[04/10] drm/i915: CHV DDR DVFS support and another watermark rewrite
diff mbox

Message ID 1435172410-9834-5-git-send-email-ville.syrjala@linux.intel.com
State New
Headers show

Commit Message

Ville Syrjälä June 24, 2015, 7 p.m. UTC
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

Turns out the VLV/CHV system agent doesn't understand memory
latencies, so trying to rely on the PND deadline mechanism is not
going to fly especially when DDR DVFS is enabled. Currently we try to
avoid the problems by lying to the system agent about the deadlines
and setting the FIFO watermarks to 8 cachelines. This however leads to
bad memory self refresh residency.

So in order to satosfy everyone we'll just give up on the deadline
scheme and program the watermarks old school based on the worst case
memory latency.

I've modelled this a bit on the ILK+ approach where we compute multiple
sets of watermarks for each pipe (PM2,PM5,DDR DVFS) and when merge thet
appropriate one later with the watermarks from other pipes. There isn't
too much to merge actually since each pipe has a totally independent
FIFO (well apart from the mess with the partially shared DSPARB
registers), but still decopuling the pipes from each other seems like a
good idea.

Eventually we'll want to perform the watermark update in two phases
around the plane update to avoid underruns due to the single buffered
watermark registers. But that's still in limbo for ILK+ too, so I've not
gone that far yet for VLV/CHV either.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h      |  28 +--
 drivers/gpu/drm/i915/intel_display.c |   6 +-
 drivers/gpu/drm/i915/intel_drv.h     |  11 ++
 drivers/gpu/drm/i915/intel_pm.c      | 318 ++++++++++++++++++++++++++++++++++-
 4 files changed, 345 insertions(+), 18 deletions(-)

Comments

Clinton Taylor June 26, 2015, 5:56 p.m. UTC | #1
On 06/24/2015 12:00 PM, ville.syrjala@linux.intel.com wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> Turns out the VLV/CHV system agent doesn't understand memory
> latencies, so trying to rely on the PND deadline mechanism is not
> going to fly especially when DDR DVFS is enabled. Currently we try to
> avoid the problems by lying to the system agent about the deadlines
> and setting the FIFO watermarks to 8 cachelines. This however leads to
> bad memory self refresh residency.
>
> So in order to satosfy everyone we'll just give up on the deadline
> scheme and program the watermarks old school based on the worst case
> memory latency.
>
> I've modelled this a bit on the ILK+ approach where we compute multiple
> sets of watermarks for each pipe (PM2,PM5,DDR DVFS) and when merge thet
> appropriate one later with the watermarks from other pipes. There isn't
> too much to merge actually since each pipe has a totally independent
> FIFO (well apart from the mess with the partially shared DSPARB
> registers), but still decopuling the pipes from each other seems like a
> good idea.
>
> Eventually we'll want to perform the watermark update in two phases
> around the plane update to avoid underruns due to the single buffered
> watermark registers. But that's still in limbo for ILK+ too, so I've not
> gone that far yet for VLV/CHV either.
>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h      |  28 +--
>   drivers/gpu/drm/i915/intel_display.c |   6 +-
>   drivers/gpu/drm/i915/intel_drv.h     |  11 ++
>   drivers/gpu/drm/i915/intel_pm.c      | 318 ++++++++++++++++++++++++++++++++++-
>   4 files changed, 345 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 514adcf..37cc653 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -276,6 +276,12 @@ struct i915_hotplug {
>   			    &dev->mode_config.plane_list,	\
>   			    base.head)
>
> +#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane)	\
> +	list_for_each_entry(intel_plane,				\
> +			    &(dev)->mode_config.plane_list,		\
> +			    base.head)					\
> +		if ((intel_plane)->pipe == (intel_crtc)->pipe)
> +
>   #define for_each_intel_crtc(dev, intel_crtc) \
>   	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head)
>
> @@ -1498,18 +1504,20 @@ struct ilk_wm_values {
>   	enum intel_ddb_partitioning partitioning;
>   };
>
> -struct vlv_wm_values {
> -	struct {
> -		uint16_t primary;
> -		uint16_t sprite[2];
> -		uint8_t cursor;
> -	} pipe[3];
> +struct vlv_pipe_wm {
> +	uint16_t primary;
> +	uint16_t sprite[2];
> +	uint8_t cursor;
> +};
>
> -	struct {
> -		uint16_t plane;
> -		uint8_t cursor;
> -	} sr;
> +struct vlv_sr_wm {
> +	uint16_t plane;
> +	uint8_t cursor;
> +};
>
> +struct vlv_wm_values {
> +	struct vlv_pipe_wm pipe[3];
> +	struct vlv_sr_wm sr;
>   	struct {
>   		uint8_t cursor;
>   		uint8_t sprite[2];
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index b15d57f..1424320 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -4690,8 +4690,11 @@ intel_pre_disable_primary(struct drm_crtc *crtc)
>   	 * event which is after the vblank start event, so we need to have a
>   	 * wait-for-vblank between disabling the plane and the pipe.
>   	 */
> -	if (HAS_GMCH_DISPLAY(dev))
> +	if (HAS_GMCH_DISPLAY(dev)) {
>   		intel_set_memory_cxsr(dev_priv, false);
> +		dev_priv->wm.vlv.cxsr = false;
> +		intel_wait_for_vblank(dev, pipe);
> +	}
>
>   	/*
>   	 * FIXME IPS should be fine as long as one plane is
> @@ -6005,7 +6008,6 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc)
>
>   	intel_crtc_load_lut(crtc);
>
> -	intel_update_watermarks(crtc);
>   	intel_enable_pipe(intel_crtc);
>
>   	assert_vblank_disabled(crtc);
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 3673a71..f26a680 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -462,6 +462,15 @@ struct intel_crtc_state {
>   	enum pipe hsw_workaround_pipe;
>   };
>
> +struct vlv_wm_state {
> +	struct vlv_pipe_wm wm[3];
> +	struct vlv_sr_wm sr[3];
> +	uint8_t num_active_planes;
> +	uint8_t num_levels;
> +	uint8_t level;
> +	bool cxsr;
> +};
> +
>   struct intel_pipe_wm {
>   	struct intel_wm_level wm[5];
>   	uint32_t linetime;
> @@ -564,6 +573,8 @@ struct intel_crtc {
>
>   	/* scalers available on this crtc */
>   	int num_scalers;
> +
> +	struct vlv_wm_state wm_state;
>   };
>
>   struct intel_plane_wm_parameters {
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index e67548d..d046e5f 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -335,8 +335,6 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
>   	if (IS_VALLEYVIEW(dev)) {
>   		I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
>   		POSTING_READ(FW_BLC_SELF_VLV);
> -		if (IS_CHERRYVIEW(dev))
> -			chv_set_memory_pm5(dev_priv, enable);
>   	} else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
>   		I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
>   		POSTING_READ(FW_BLC_SELF);
> @@ -929,8 +927,6 @@ static void vlv_write_wm_values(struct intel_crtc *crtc,
>   	}
>
>   	POSTING_READ(DSPFW1);
> -
> -	dev_priv->wm.vlv = *wm;
>   }
>
>   #undef FW_WM_VLV
> @@ -1014,6 +1010,72 @@ enum vlv_wm_level {
>   	VLV_WM_NUM_LEVELS = 1,
>   };
>
> +/* latency must be in 0.1us units. */
> +static unsigned int vlv_wm_method2(unsigned int pixel_rate,
> +				   unsigned int pipe_htotal,
> +				   unsigned int horiz_pixels,
> +				   unsigned int bytes_per_pixel,
> +				   unsigned int latency)
> +{
> +	unsigned int ret;
> +
> +	ret = (latency * pixel_rate) / (pipe_htotal * 10000);
> +	ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
> +	ret = DIV_ROUND_UP(ret, 64);
> +
> +	return ret;
> +}
> +
> +static void vlv_setup_wm_latency(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	/* all latencies in usec */
> +	dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
> +
> +	if (IS_CHERRYVIEW(dev_priv)) {
> +		dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
> +		dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;

nit #defines for these magic values please

> +	}
> +}
> +
> +static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
> +				     struct intel_crtc *crtc,
> +				     const struct intel_plane_state *state,
> +				     int level)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
> +	int clock, htotal, pixel_size, width, wm;
> +
> +	if (dev_priv->wm.pri_latency[level] == 0)
> +		return USHRT_MAX;
> +
> +	if (!state->visible)
> +		return 0;
> +
> +	pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
> +	clock = crtc->config->base.adjusted_mode.crtc_clock;
> +	htotal = crtc->config->base.adjusted_mode.crtc_htotal;
> +	width = crtc->config->pipe_src_w;
> +	if (WARN_ON(htotal == 0))
> +		htotal = 1;
> +
> +	if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
> +		/*
> +		 * FIXME the formula gives values that are
> +		 * too big for the cursor FIFO, and hence we
> +		 * would never be able to use cursors. For
> +		 * now just hardcode the watermark.
> +		 */
> +		wm = 63;

Hard coding to maximum value of 63. Should probably be programmed to 
worst case instead of maximum.

> +	} else {
> +		wm = vlv_wm_method2(clock, htotal, width, pixel_size,
> +				    dev_priv->wm.pri_latency[level] * 10);
> +	}
> +
> +	return min_t(int, wm, USHRT_MAX);
> +}
> +
>   static bool vlv_compute_sr_wm(struct drm_device *dev,
>   			      struct vlv_wm_values *wm)
>   {
> @@ -1105,6 +1167,249 @@ static void valleyview_update_wm(struct drm_crtc *crtc)
>
>   	if (cxsr_enabled)
>   		intel_set_memory_cxsr(dev_priv, true);
> +
> +	dev_priv->wm.vlv = wm;
> +}
> +
> +static void vlv_invert_wms(struct intel_crtc *crtc)
> +{
> +	struct vlv_wm_state *wm_state = &crtc->wm_state;
> +	int level;
> +
> +	for (level = 0; level < wm_state->num_levels; level++) {
> +		struct drm_device *dev = crtc->base.dev;
> +		const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
> +		struct intel_plane *plane;
> +
> +		wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane;
> +		wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor;
> +
> +		for_each_intel_plane_on_crtc(dev, crtc, plane) {
> +			switch (plane->base.type) {
> +				int sprite;
> +			case DRM_PLANE_TYPE_CURSOR:
> +				wm_state->wm[level].cursor = plane->wm.fifo_size -
> +					wm_state->wm[level].cursor;
> +				break;
> +			case DRM_PLANE_TYPE_PRIMARY:
> +				wm_state->wm[level].primary = plane->wm.fifo_size -
> +					wm_state->wm[level].primary;
> +				break;
> +			case DRM_PLANE_TYPE_OVERLAY:
> +				sprite = plane->plane;
> +				wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size -
> +					wm_state->wm[level].sprite[sprite];
> +				break;
> +			}
> +		}
> +	}
> +}
> +
> +static void _vlv_compute_wm(struct intel_crtc *crtc)
> +{
> +	struct drm_device *dev = crtc->base.dev;
> +	struct vlv_wm_state *wm_state = &crtc->wm_state;
> +	struct intel_plane *plane;
> +	int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
> +	int level;
> +
> +	memset(wm_state, 0, sizeof(*wm_state));
> +
> +	wm_state->cxsr = crtc->pipe != PIPE_C;
> +	if (IS_CHERRYVIEW(dev))
> +		wm_state->num_levels = CHV_WM_NUM_LEVELS;
> +	else
> +		wm_state->num_levels = VLV_WM_NUM_LEVELS;
> +
> +	wm_state->num_active_planes = 0;
> +	for_each_intel_plane_on_crtc(dev, crtc, plane) {
> +		struct intel_plane_state *state =
> +			to_intel_plane_state(plane->base.state);
> +
> +		if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
> +			continue;
> +
> +		if (state->visible)
> +			wm_state->num_active_planes++;
> +	}
> +
> +	if (wm_state->num_active_planes != 1)
> +		wm_state->cxsr = false;
> +
> +	if (wm_state->cxsr) {
> +		for (level = 0; level < wm_state->num_levels; level++) {
> +			wm_state->sr[level].plane = sr_fifo_size;
> +			wm_state->sr[level].cursor = 63;
> +		}
> +	}
> +
> +	for_each_intel_plane_on_crtc(dev, crtc, plane) {
> +		struct intel_plane_state *state =
> +			to_intel_plane_state(plane->base.state);
> +
> +		if (!state->visible)
> +			continue;
> +
> +		/* normal watermarks */
> +		for (level = 0; level < wm_state->num_levels; level++) {
> +			int wm = vlv_compute_wm_level(plane, crtc, state, level);
> +			int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511;
> +
> +			/* hack */
> +			if (WARN_ON(level == 0 && wm > max_wm))
> +				wm = max_wm;
> +
> +			if (wm > plane->wm.fifo_size)
> +				break;
> +
> +			switch (plane->base.type) {
> +				int sprite;
> +			case DRM_PLANE_TYPE_CURSOR:
> +				wm_state->wm[level].cursor = wm;
> +				break;
> +			case DRM_PLANE_TYPE_PRIMARY:
> +				wm_state->wm[level].primary = wm;
> +				break;
> +			case DRM_PLANE_TYPE_OVERLAY:
> +				sprite = plane->plane;
> +				wm_state->wm[level].sprite[sprite] = wm;
> +				break;
> +			}
> +		}
> +
> +		wm_state->num_levels = level;
> +
> +		if (!wm_state->cxsr)
> +			continue;
> +
> +		/* maxfifo watermarks */
> +		switch (plane->base.type) {
> +			int sprite, level;
> +		case DRM_PLANE_TYPE_CURSOR:
> +			for (level = 0; level < wm_state->num_levels; level++)
> +				wm_state->sr[level].cursor =
> +					wm_state->sr[level].cursor;
> +			break;
> +		case DRM_PLANE_TYPE_PRIMARY:
> +			for (level = 0; level < wm_state->num_levels; level++)
> +				wm_state->sr[level].plane =
> +					min(wm_state->sr[level].plane,
> +					    wm_state->wm[level].primary);
> +			break;
> +		case DRM_PLANE_TYPE_OVERLAY:
> +			sprite = plane->plane;
> +			for (level = 0; level < wm_state->num_levels; level++)
> +				wm_state->sr[level].plane =
> +					min(wm_state->sr[level].plane,
> +					    wm_state->wm[level].sprite[sprite]);
> +			break;
> +		}
> +	}
> +
> +	/* clear any (partially) filled invalid levels */
> +	for (level = wm_state->num_levels; level < CHV_WM_NUM_LEVELS; level++) {
> +		memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
> +		memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
> +	}
> +
> +	vlv_invert_wms(crtc);
> +}
> +
> +static void vlv_merge_wm(struct drm_device *dev,
> +			 struct vlv_wm_values *wm)
> +{
> +	struct intel_crtc *crtc;
> +	int num_active_crtcs = 0;
> +
> +	if (IS_CHERRYVIEW(dev))
> +		wm->level = VLV_WM_LEVEL_DDR_DVFS;
> +	else
> +		wm->level = VLV_WM_LEVEL_PM2;
> +	wm->cxsr = true;
> +
> +	for_each_intel_crtc(dev, crtc) {
> +		const struct vlv_wm_state *wm_state = &crtc->wm_state;
> +
> +		if (!crtc->active)
> +			continue;
> +
> +		if (!wm_state->cxsr)
> +			wm->cxsr = false;
> +
> +		num_active_crtcs++;
> +		wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
> +	}
> +
> +	if (num_active_crtcs != 1)
> +		wm->cxsr = false;
> +
> +	for_each_intel_crtc(dev, crtc) {
> +		struct vlv_wm_state *wm_state = &crtc->wm_state;
> +		enum pipe pipe = crtc->pipe;
> +
> +		if (!crtc->active)
> +			continue;
> +
> +		wm->pipe[pipe] = wm_state->wm[wm->level];
> +		if (wm->cxsr)
> +			wm->sr = wm_state->sr[wm->level];
> +
> +		wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2;
> +		wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2;
> +		wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2;
> +		wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2;

Did we really decide that 0x2 was the final correct value for the DL 
registers? I figured we would be running 0x7 for CHV.

> +	}
> +}
> +
> +static void vlv_update_wm(struct drm_crtc *crtc)
> +{
> +	struct drm_device *dev = crtc->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	enum pipe pipe = intel_crtc->pipe;
> +	struct vlv_wm_values wm = {};
> +
> +	_vlv_compute_wm(intel_crtc);
> +	vlv_merge_wm(dev, &wm);
> +
> +	if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0)
> +		return;
> +
> +	if (wm.level < VLV_WM_LEVEL_DDR_DVFS &&
> +	    dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS)
> +		chv_set_memory_dvfs(dev_priv, false);
> +
> +	if (wm.level < VLV_WM_LEVEL_PM5 &&
> +	    dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5)
> +		chv_set_memory_pm5(dev_priv, false);
> +
> +	if (!wm.cxsr && dev_priv->wm.vlv.cxsr) {
> +		intel_set_memory_cxsr(dev_priv, false);
> +		intel_wait_for_vblank(dev, pipe);
> +	}
> +
> +	vlv_write_wm_values(intel_crtc, &wm);
> +
> +	DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
> +		      "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n",
> +		      pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
> +		      wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1],
> +		      wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr);
> +

Love the detailed DRM_DEBUG_KMS - of course its showing the only real 
issue I have found in the series that the cursor watermark is being 
programmed inverted. 63 when the cursor is disabled and 0 when its 
enabled. This leads to an SRR% increase of ~10% when the cursor is 
enabled.

// Cursor disabled
[ 2662.355218] [drm:vlv_update_wm] Setting FIFO watermarks - A: 
plane=340, cursor=63, sprite0=0, sprite1=0, SR: plane=1364, cursor=0 
level=2 cxsr=1

// Cursor Enabled
[ 2667.531049] [drm:vlv_update_wm] Setting FIFO watermarks - A: 
plane=340, cursor=0, sprite0=0, sprite1=0, SR: plane=1364, cursor=0 
level=2 cxsr=1

I haven't found the line of code that actually inverts this programming. 
I will continue to investigate during testing of this series.


> +	if (wm.cxsr && !dev_priv->wm.vlv.cxsr) {
> +		intel_wait_for_vblank(dev, pipe);
> +		intel_set_memory_cxsr(dev_priv, true);
> +	}
> +
> +	if (wm.level >= VLV_WM_LEVEL_PM5 &&
> +	    dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5)
> +		chv_set_memory_pm5(dev_priv, true);
> +
> +	if (wm.level >= VLV_WM_LEVEL_DDR_DVFS &&
> +	    dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS)
> +		chv_set_memory_dvfs(dev_priv, true);
> +
> +	dev_priv->wm.vlv = wm;
>   }
>
>   static void valleyview_update_sprite_wm(struct drm_plane *plane,
> @@ -6823,8 +7128,9 @@ void intel_init_pm(struct drm_device *dev)
>   		else if (INTEL_INFO(dev)->gen == 8)
>   			dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
>   	} else if (IS_CHERRYVIEW(dev)) {
> -		dev_priv->display.update_wm = valleyview_update_wm;
> -		dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
> +		vlv_setup_wm_latency(dev);
> +
> +		dev_priv->display.update_wm = vlv_update_wm;
>   		dev_priv->display.init_clock_gating =
>   			cherryview_init_clock_gating;
>   	} else if (IS_VALLEYVIEW(dev)) {
>
Ville Syrjälä June 26, 2015, 7:48 p.m. UTC | #2
On Fri, Jun 26, 2015 at 10:56:33AM -0700, Clint Taylor wrote:
> On 06/24/2015 12:00 PM, ville.syrjala@linux.intel.com wrote:
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> >
> > Turns out the VLV/CHV system agent doesn't understand memory
> > latencies, so trying to rely on the PND deadline mechanism is not
> > going to fly especially when DDR DVFS is enabled. Currently we try to
> > avoid the problems by lying to the system agent about the deadlines
> > and setting the FIFO watermarks to 8 cachelines. This however leads to
> > bad memory self refresh residency.
> >
> > So in order to satosfy everyone we'll just give up on the deadline
> > scheme and program the watermarks old school based on the worst case
> > memory latency.
> >
> > I've modelled this a bit on the ILK+ approach where we compute multiple
> > sets of watermarks for each pipe (PM2,PM5,DDR DVFS) and when merge thet
> > appropriate one later with the watermarks from other pipes. There isn't
> > too much to merge actually since each pipe has a totally independent
> > FIFO (well apart from the mess with the partially shared DSPARB
> > registers), but still decopuling the pipes from each other seems like a
> > good idea.
> >
> > Eventually we'll want to perform the watermark update in two phases
> > around the plane update to avoid underruns due to the single buffered
> > watermark registers. But that's still in limbo for ILK+ too, so I've not
> > gone that far yet for VLV/CHV either.
> >
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_drv.h      |  28 +--
> >   drivers/gpu/drm/i915/intel_display.c |   6 +-
> >   drivers/gpu/drm/i915/intel_drv.h     |  11 ++
> >   drivers/gpu/drm/i915/intel_pm.c      | 318 ++++++++++++++++++++++++++++++++++-
> >   4 files changed, 345 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 514adcf..37cc653 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -276,6 +276,12 @@ struct i915_hotplug {
> >   			    &dev->mode_config.plane_list,	\
> >   			    base.head)
> >
> > +#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane)	\
> > +	list_for_each_entry(intel_plane,				\
> > +			    &(dev)->mode_config.plane_list,		\
> > +			    base.head)					\
> > +		if ((intel_plane)->pipe == (intel_crtc)->pipe)
> > +
> >   #define for_each_intel_crtc(dev, intel_crtc) \
> >   	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head)
> >
> > @@ -1498,18 +1504,20 @@ struct ilk_wm_values {
> >   	enum intel_ddb_partitioning partitioning;
> >   };
> >
> > -struct vlv_wm_values {
> > -	struct {
> > -		uint16_t primary;
> > -		uint16_t sprite[2];
> > -		uint8_t cursor;
> > -	} pipe[3];
> > +struct vlv_pipe_wm {
> > +	uint16_t primary;
> > +	uint16_t sprite[2];
> > +	uint8_t cursor;
> > +};
> >
> > -	struct {
> > -		uint16_t plane;
> > -		uint8_t cursor;
> > -	} sr;
> > +struct vlv_sr_wm {
> > +	uint16_t plane;
> > +	uint8_t cursor;
> > +};
> >
> > +struct vlv_wm_values {
> > +	struct vlv_pipe_wm pipe[3];
> > +	struct vlv_sr_wm sr;
> >   	struct {
> >   		uint8_t cursor;
> >   		uint8_t sprite[2];
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index b15d57f..1424320 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -4690,8 +4690,11 @@ intel_pre_disable_primary(struct drm_crtc *crtc)
> >   	 * event which is after the vblank start event, so we need to have a
> >   	 * wait-for-vblank between disabling the plane and the pipe.
> >   	 */
> > -	if (HAS_GMCH_DISPLAY(dev))
> > +	if (HAS_GMCH_DISPLAY(dev)) {
> >   		intel_set_memory_cxsr(dev_priv, false);
> > +		dev_priv->wm.vlv.cxsr = false;
> > +		intel_wait_for_vblank(dev, pipe);
> > +	}
> >
> >   	/*
> >   	 * FIXME IPS should be fine as long as one plane is
> > @@ -6005,7 +6008,6 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc)
> >
> >   	intel_crtc_load_lut(crtc);
> >
> > -	intel_update_watermarks(crtc);
> >   	intel_enable_pipe(intel_crtc);
> >
> >   	assert_vblank_disabled(crtc);
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > index 3673a71..f26a680 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -462,6 +462,15 @@ struct intel_crtc_state {
> >   	enum pipe hsw_workaround_pipe;
> >   };
> >
> > +struct vlv_wm_state {
> > +	struct vlv_pipe_wm wm[3];
> > +	struct vlv_sr_wm sr[3];
> > +	uint8_t num_active_planes;
> > +	uint8_t num_levels;
> > +	uint8_t level;
> > +	bool cxsr;
> > +};
> > +
> >   struct intel_pipe_wm {
> >   	struct intel_wm_level wm[5];
> >   	uint32_t linetime;
> > @@ -564,6 +573,8 @@ struct intel_crtc {
> >
> >   	/* scalers available on this crtc */
> >   	int num_scalers;
> > +
> > +	struct vlv_wm_state wm_state;
> >   };
> >
> >   struct intel_plane_wm_parameters {
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> > index e67548d..d046e5f 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -335,8 +335,6 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
> >   	if (IS_VALLEYVIEW(dev)) {
> >   		I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
> >   		POSTING_READ(FW_BLC_SELF_VLV);
> > -		if (IS_CHERRYVIEW(dev))
> > -			chv_set_memory_pm5(dev_priv, enable);
> >   	} else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
> >   		I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
> >   		POSTING_READ(FW_BLC_SELF);
> > @@ -929,8 +927,6 @@ static void vlv_write_wm_values(struct intel_crtc *crtc,
> >   	}
> >
> >   	POSTING_READ(DSPFW1);
> > -
> > -	dev_priv->wm.vlv = *wm;
> >   }
> >
> >   #undef FW_WM_VLV
> > @@ -1014,6 +1010,72 @@ enum vlv_wm_level {
> >   	VLV_WM_NUM_LEVELS = 1,
> >   };
> >
> > +/* latency must be in 0.1us units. */
> > +static unsigned int vlv_wm_method2(unsigned int pixel_rate,
> > +				   unsigned int pipe_htotal,
> > +				   unsigned int horiz_pixels,
> > +				   unsigned int bytes_per_pixel,
> > +				   unsigned int latency)
> > +{
> > +	unsigned int ret;
> > +
> > +	ret = (latency * pixel_rate) / (pipe_htotal * 10000);
> > +	ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
> > +	ret = DIV_ROUND_UP(ret, 64);
> > +
> > +	return ret;
> > +}
> > +
> > +static void vlv_setup_wm_latency(struct drm_device *dev)
> > +{
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +
> > +	/* all latencies in usec */
> > +	dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
> > +
> > +	if (IS_CHERRYVIEW(dev_priv)) {
> > +		dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
> > +		dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
> 
> nit #defines for these magic values please

What's the point of doing that? These values are not repeated anywhere
else.

> 
> > +	}
> > +}
> > +
> > +static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
> > +				     struct intel_crtc *crtc,
> > +				     const struct intel_plane_state *state,
> > +				     int level)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
> > +	int clock, htotal, pixel_size, width, wm;
> > +
> > +	if (dev_priv->wm.pri_latency[level] == 0)
> > +		return USHRT_MAX;
> > +
> > +	if (!state->visible)
> > +		return 0;
> > +
> > +	pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
> > +	clock = crtc->config->base.adjusted_mode.crtc_clock;
> > +	htotal = crtc->config->base.adjusted_mode.crtc_htotal;
> > +	width = crtc->config->pipe_src_w;
> > +	if (WARN_ON(htotal == 0))
> > +		htotal = 1;
> > +
> > +	if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
> > +		/*
> > +		 * FIXME the formula gives values that are
> > +		 * too big for the cursor FIFO, and hence we
> > +		 * would never be able to use cursors. For
> > +		 * now just hardcode the watermark.
> > +		 */
> > +		wm = 63;
> 
> Hard coding to maximum value of 63. Should probably be programmed to 
> worst case instead of maximum.

I have no idea what's the worst case. I was too lazy to try to
empirically deduce some kind of number/formula that works all the time.
Since it's a very small plane usually hardcoding it like this shouldn't
hurt too much (I hope). We can't go above 63, so if that fails we're
screwed anyway.

> > +	} else {
> > +		wm = vlv_wm_method2(clock, htotal, width, pixel_size,
> > +				    dev_priv->wm.pri_latency[level] * 10);
> > +	}
> > +
> > +	return min_t(int, wm, USHRT_MAX);
> > +}
> > +
> >   static bool vlv_compute_sr_wm(struct drm_device *dev,
> >   			      struct vlv_wm_values *wm)
> >   {
> > @@ -1105,6 +1167,249 @@ static void valleyview_update_wm(struct drm_crtc *crtc)
> >
> >   	if (cxsr_enabled)
> >   		intel_set_memory_cxsr(dev_priv, true);
> > +
> > +	dev_priv->wm.vlv = wm;
> > +}
> > +
> > +static void vlv_invert_wms(struct intel_crtc *crtc)
> > +{
> > +	struct vlv_wm_state *wm_state = &crtc->wm_state;
> > +	int level;
> > +
> > +	for (level = 0; level < wm_state->num_levels; level++) {
> > +		struct drm_device *dev = crtc->base.dev;
> > +		const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
> > +		struct intel_plane *plane;
> > +
> > +		wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane;
> > +		wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor;
> > +
> > +		for_each_intel_plane_on_crtc(dev, crtc, plane) {
> > +			switch (plane->base.type) {
> > +				int sprite;
> > +			case DRM_PLANE_TYPE_CURSOR:
> > +				wm_state->wm[level].cursor = plane->wm.fifo_size -
> > +					wm_state->wm[level].cursor;
> > +				break;
> > +			case DRM_PLANE_TYPE_PRIMARY:
> > +				wm_state->wm[level].primary = plane->wm.fifo_size -
> > +					wm_state->wm[level].primary;
> > +				break;
> > +			case DRM_PLANE_TYPE_OVERLAY:
> > +				sprite = plane->plane;
> > +				wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size -
> > +					wm_state->wm[level].sprite[sprite];
> > +				break;
> > +			}
> > +		}
> > +	}
> > +}
> > +
> > +static void _vlv_compute_wm(struct intel_crtc *crtc)
> > +{
> > +	struct drm_device *dev = crtc->base.dev;
> > +	struct vlv_wm_state *wm_state = &crtc->wm_state;
> > +	struct intel_plane *plane;
> > +	int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
> > +	int level;
> > +
> > +	memset(wm_state, 0, sizeof(*wm_state));
> > +
> > +	wm_state->cxsr = crtc->pipe != PIPE_C;
> > +	if (IS_CHERRYVIEW(dev))
> > +		wm_state->num_levels = CHV_WM_NUM_LEVELS;
> > +	else
> > +		wm_state->num_levels = VLV_WM_NUM_LEVELS;
> > +
> > +	wm_state->num_active_planes = 0;
> > +	for_each_intel_plane_on_crtc(dev, crtc, plane) {
> > +		struct intel_plane_state *state =
> > +			to_intel_plane_state(plane->base.state);
> > +
> > +		if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
> > +			continue;
> > +
> > +		if (state->visible)
> > +			wm_state->num_active_planes++;
> > +	}
> > +
> > +	if (wm_state->num_active_planes != 1)
> > +		wm_state->cxsr = false;
> > +
> > +	if (wm_state->cxsr) {
> > +		for (level = 0; level < wm_state->num_levels; level++) {
> > +			wm_state->sr[level].plane = sr_fifo_size;
> > +			wm_state->sr[level].cursor = 63;
> > +		}
> > +	}
> > +
> > +	for_each_intel_plane_on_crtc(dev, crtc, plane) {
> > +		struct intel_plane_state *state =
> > +			to_intel_plane_state(plane->base.state);
> > +
> > +		if (!state->visible)
> > +			continue;
> > +
> > +		/* normal watermarks */
> > +		for (level = 0; level < wm_state->num_levels; level++) {
> > +			int wm = vlv_compute_wm_level(plane, crtc, state, level);
> > +			int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511;
> > +
> > +			/* hack */
> > +			if (WARN_ON(level == 0 && wm > max_wm))
> > +				wm = max_wm;
> > +
> > +			if (wm > plane->wm.fifo_size)
> > +				break;
> > +
> > +			switch (plane->base.type) {
> > +				int sprite;
> > +			case DRM_PLANE_TYPE_CURSOR:
> > +				wm_state->wm[level].cursor = wm;
> > +				break;
> > +			case DRM_PLANE_TYPE_PRIMARY:
> > +				wm_state->wm[level].primary = wm;
> > +				break;
> > +			case DRM_PLANE_TYPE_OVERLAY:
> > +				sprite = plane->plane;
> > +				wm_state->wm[level].sprite[sprite] = wm;
> > +				break;
> > +			}
> > +		}
> > +
> > +		wm_state->num_levels = level;
> > +
> > +		if (!wm_state->cxsr)
> > +			continue;
> > +
> > +		/* maxfifo watermarks */
> > +		switch (plane->base.type) {
> > +			int sprite, level;
> > +		case DRM_PLANE_TYPE_CURSOR:
> > +			for (level = 0; level < wm_state->num_levels; level++)
> > +				wm_state->sr[level].cursor =
> > +					wm_state->sr[level].cursor;
> > +			break;
> > +		case DRM_PLANE_TYPE_PRIMARY:
> > +			for (level = 0; level < wm_state->num_levels; level++)
> > +				wm_state->sr[level].plane =
> > +					min(wm_state->sr[level].plane,
> > +					    wm_state->wm[level].primary);
> > +			break;
> > +		case DRM_PLANE_TYPE_OVERLAY:
> > +			sprite = plane->plane;
> > +			for (level = 0; level < wm_state->num_levels; level++)
> > +				wm_state->sr[level].plane =
> > +					min(wm_state->sr[level].plane,
> > +					    wm_state->wm[level].sprite[sprite]);
> > +			break;
> > +		}
> > +	}
> > +
> > +	/* clear any (partially) filled invalid levels */
> > +	for (level = wm_state->num_levels; level < CHV_WM_NUM_LEVELS; level++) {
> > +		memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
> > +		memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
> > +	}
> > +
> > +	vlv_invert_wms(crtc);
> > +}
> > +
> > +static void vlv_merge_wm(struct drm_device *dev,
> > +			 struct vlv_wm_values *wm)
> > +{
> > +	struct intel_crtc *crtc;
> > +	int num_active_crtcs = 0;
> > +
> > +	if (IS_CHERRYVIEW(dev))
> > +		wm->level = VLV_WM_LEVEL_DDR_DVFS;
> > +	else
> > +		wm->level = VLV_WM_LEVEL_PM2;
> > +	wm->cxsr = true;
> > +
> > +	for_each_intel_crtc(dev, crtc) {
> > +		const struct vlv_wm_state *wm_state = &crtc->wm_state;
> > +
> > +		if (!crtc->active)
> > +			continue;
> > +
> > +		if (!wm_state->cxsr)
> > +			wm->cxsr = false;
> > +
> > +		num_active_crtcs++;
> > +		wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
> > +	}
> > +
> > +	if (num_active_crtcs != 1)
> > +		wm->cxsr = false;
> > +
> > +	for_each_intel_crtc(dev, crtc) {
> > +		struct vlv_wm_state *wm_state = &crtc->wm_state;
> > +		enum pipe pipe = crtc->pipe;
> > +
> > +		if (!crtc->active)
> > +			continue;
> > +
> > +		wm->pipe[pipe] = wm_state->wm[wm->level];
> > +		if (wm->cxsr)
> > +			wm->sr = wm_state->sr[wm->level];
> > +
> > +		wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2;
> > +		wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2;
> > +		wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2;
> > +		wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2;
> 
> Did we really decide that 0x2 was the final correct value for the DL 
> registers? I figured we would be running 0x7 for CHV.

I'm wary of increasing it too much. That'll give the system agent a
better chance of making a mess of things.

> 
> > +	}
> > +}
> > +
> > +static void vlv_update_wm(struct drm_crtc *crtc)
> > +{
> > +	struct drm_device *dev = crtc->dev;
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> > +	enum pipe pipe = intel_crtc->pipe;
> > +	struct vlv_wm_values wm = {};
> > +
> > +	_vlv_compute_wm(intel_crtc);
> > +	vlv_merge_wm(dev, &wm);
> > +
> > +	if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0)
> > +		return;
> > +
> > +	if (wm.level < VLV_WM_LEVEL_DDR_DVFS &&
> > +	    dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS)
> > +		chv_set_memory_dvfs(dev_priv, false);
> > +
> > +	if (wm.level < VLV_WM_LEVEL_PM5 &&
> > +	    dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5)
> > +		chv_set_memory_pm5(dev_priv, false);
> > +
> > +	if (!wm.cxsr && dev_priv->wm.vlv.cxsr) {
> > +		intel_set_memory_cxsr(dev_priv, false);
> > +		intel_wait_for_vblank(dev, pipe);
> > +	}
> > +
> > +	vlv_write_wm_values(intel_crtc, &wm);
> > +
> > +	DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
> > +		      "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n",
> > +		      pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
> > +		      wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1],
> > +		      wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr);
> > +
> 
> Love the detailed DRM_DEBUG_KMS - of course its showing the only real 
> issue I have found in the series that the cursor watermark is being 
> programmed inverted. 63 when the cursor is disabled and 0 when its 
> enabled. This leads to an SRR% increase of ~10% when the cursor is 
> enabled.
> 
> // Cursor disabled
> [ 2662.355218] [drm:vlv_update_wm] Setting FIFO watermarks - A: 
> plane=340, cursor=63, sprite0=0, sprite1=0, SR: plane=1364, cursor=0 
> level=2 cxsr=1
> 
> // Cursor Enabled
> [ 2667.531049] [drm:vlv_update_wm] Setting FIFO watermarks - A: 
> plane=340, cursor=0, sprite0=0, sprite1=0, SR: plane=1364, cursor=0 
> level=2 cxsr=1
> 
> I haven't found the line of code that actually inverts this programming. 
> I will continue to investigate during testing of this series.

vlv_invert_wms()

That's done on purpose since calculating the watermarks the "right way
up" first makes makes it easier to think about them (ie. to compare with
the FIFO size, and to merge the plane SR watermark). It also make things
looks more ILK-like since ILK+ take the watermarks in the non-inverted
form.

> 
> 
> > +	if (wm.cxsr && !dev_priv->wm.vlv.cxsr) {
> > +		intel_wait_for_vblank(dev, pipe);
> > +		intel_set_memory_cxsr(dev_priv, true);
> > +	}
> > +
> > +	if (wm.level >= VLV_WM_LEVEL_PM5 &&
> > +	    dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5)
> > +		chv_set_memory_pm5(dev_priv, true);
> > +
> > +	if (wm.level >= VLV_WM_LEVEL_DDR_DVFS &&
> > +	    dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS)
> > +		chv_set_memory_dvfs(dev_priv, true);
> > +
> > +	dev_priv->wm.vlv = wm;
> >   }
> >
> >   static void valleyview_update_sprite_wm(struct drm_plane *plane,
> > @@ -6823,8 +7128,9 @@ void intel_init_pm(struct drm_device *dev)
> >   		else if (INTEL_INFO(dev)->gen == 8)
> >   			dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
> >   	} else if (IS_CHERRYVIEW(dev)) {
> > -		dev_priv->display.update_wm = valleyview_update_wm;
> > -		dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
> > +		vlv_setup_wm_latency(dev);
> > +
> > +		dev_priv->display.update_wm = vlv_update_wm;
> >   		dev_priv->display.init_clock_gating =
> >   			cherryview_init_clock_gating;
> >   	} else if (IS_VALLEYVIEW(dev)) {
> >
Clinton Taylor June 26, 2015, 8:21 p.m. UTC | #3
On 06/26/2015 12:48 PM, Ville Syrjälä wrote:
> On Fri, Jun 26, 2015 at 10:56:33AM -0700, Clint Taylor wrote:
>> On 06/24/2015 12:00 PM, ville.syrjala@linux.intel.com wrote:
>>> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>>>
>>> Turns out the VLV/CHV system agent doesn't understand memory
>>> latencies, so trying to rely on the PND deadline mechanism is not
>>> going to fly especially when DDR DVFS is enabled. Currently we try to
>>> avoid the problems by lying to the system agent about the deadlines
>>> and setting the FIFO watermarks to 8 cachelines. This however leads to
>>> bad memory self refresh residency.
>>>
>>> So in order to satosfy everyone we'll just give up on the deadline
>>> scheme and program the watermarks old school based on the worst case
>>> memory latency.
>>>
>>> I've modelled this a bit on the ILK+ approach where we compute multiple
>>> sets of watermarks for each pipe (PM2,PM5,DDR DVFS) and when merge thet
>>> appropriate one later with the watermarks from other pipes. There isn't
>>> too much to merge actually since each pipe has a totally independent
>>> FIFO (well apart from the mess with the partially shared DSPARB
>>> registers), but still decopuling the pipes from each other seems like a
>>> good idea.
>>>
>>> Eventually we'll want to perform the watermark update in two phases
>>> around the plane update to avoid underruns due to the single buffered
>>> watermark registers. But that's still in limbo for ILK+ too, so I've not
>>> gone that far yet for VLV/CHV either.
>>>
>>> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_drv.h      |  28 +--
>>>    drivers/gpu/drm/i915/intel_display.c |   6 +-
>>>    drivers/gpu/drm/i915/intel_drv.h     |  11 ++
>>>    drivers/gpu/drm/i915/intel_pm.c      | 318 ++++++++++++++++++++++++++++++++++-
>>>    4 files changed, 345 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index 514adcf..37cc653 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -276,6 +276,12 @@ struct i915_hotplug {
>>>    			    &dev->mode_config.plane_list,	\
>>>    			    base.head)
>>>
>>> +#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane)	\
>>> +	list_for_each_entry(intel_plane,				\
>>> +			    &(dev)->mode_config.plane_list,		\
>>> +			    base.head)					\
>>> +		if ((intel_plane)->pipe == (intel_crtc)->pipe)
>>> +
>>>    #define for_each_intel_crtc(dev, intel_crtc) \
>>>    	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head)
>>>
>>> @@ -1498,18 +1504,20 @@ struct ilk_wm_values {
>>>    	enum intel_ddb_partitioning partitioning;
>>>    };
>>>
>>> -struct vlv_wm_values {
>>> -	struct {
>>> -		uint16_t primary;
>>> -		uint16_t sprite[2];
>>> -		uint8_t cursor;
>>> -	} pipe[3];
>>> +struct vlv_pipe_wm {
>>> +	uint16_t primary;
>>> +	uint16_t sprite[2];
>>> +	uint8_t cursor;
>>> +};
>>>
>>> -	struct {
>>> -		uint16_t plane;
>>> -		uint8_t cursor;
>>> -	} sr;
>>> +struct vlv_sr_wm {
>>> +	uint16_t plane;
>>> +	uint8_t cursor;
>>> +};
>>>
>>> +struct vlv_wm_values {
>>> +	struct vlv_pipe_wm pipe[3];
>>> +	struct vlv_sr_wm sr;
>>>    	struct {
>>>    		uint8_t cursor;
>>>    		uint8_t sprite[2];
>>> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
>>> index b15d57f..1424320 100644
>>> --- a/drivers/gpu/drm/i915/intel_display.c
>>> +++ b/drivers/gpu/drm/i915/intel_display.c
>>> @@ -4690,8 +4690,11 @@ intel_pre_disable_primary(struct drm_crtc *crtc)
>>>    	 * event which is after the vblank start event, so we need to have a
>>>    	 * wait-for-vblank between disabling the plane and the pipe.
>>>    	 */
>>> -	if (HAS_GMCH_DISPLAY(dev))
>>> +	if (HAS_GMCH_DISPLAY(dev)) {
>>>    		intel_set_memory_cxsr(dev_priv, false);
>>> +		dev_priv->wm.vlv.cxsr = false;
>>> +		intel_wait_for_vblank(dev, pipe);
>>> +	}
>>>
>>>    	/*
>>>    	 * FIXME IPS should be fine as long as one plane is
>>> @@ -6005,7 +6008,6 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc)
>>>
>>>    	intel_crtc_load_lut(crtc);
>>>
>>> -	intel_update_watermarks(crtc);
>>>    	intel_enable_pipe(intel_crtc);
>>>
>>>    	assert_vblank_disabled(crtc);
>>> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
>>> index 3673a71..f26a680 100644
>>> --- a/drivers/gpu/drm/i915/intel_drv.h
>>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>>> @@ -462,6 +462,15 @@ struct intel_crtc_state {
>>>    	enum pipe hsw_workaround_pipe;
>>>    };
>>>
>>> +struct vlv_wm_state {
>>> +	struct vlv_pipe_wm wm[3];
>>> +	struct vlv_sr_wm sr[3];
>>> +	uint8_t num_active_planes;
>>> +	uint8_t num_levels;
>>> +	uint8_t level;
>>> +	bool cxsr;
>>> +};
>>> +
>>>    struct intel_pipe_wm {
>>>    	struct intel_wm_level wm[5];
>>>    	uint32_t linetime;
>>> @@ -564,6 +573,8 @@ struct intel_crtc {
>>>
>>>    	/* scalers available on this crtc */
>>>    	int num_scalers;
>>> +
>>> +	struct vlv_wm_state wm_state;
>>>    };
>>>
>>>    struct intel_plane_wm_parameters {
>>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>>> index e67548d..d046e5f 100644
>>> --- a/drivers/gpu/drm/i915/intel_pm.c
>>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>>> @@ -335,8 +335,6 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
>>>    	if (IS_VALLEYVIEW(dev)) {
>>>    		I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
>>>    		POSTING_READ(FW_BLC_SELF_VLV);
>>> -		if (IS_CHERRYVIEW(dev))
>>> -			chv_set_memory_pm5(dev_priv, enable);
>>>    	} else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
>>>    		I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
>>>    		POSTING_READ(FW_BLC_SELF);
>>> @@ -929,8 +927,6 @@ static void vlv_write_wm_values(struct intel_crtc *crtc,
>>>    	}
>>>
>>>    	POSTING_READ(DSPFW1);
>>> -
>>> -	dev_priv->wm.vlv = *wm;
>>>    }
>>>
>>>    #undef FW_WM_VLV
>>> @@ -1014,6 +1010,72 @@ enum vlv_wm_level {
>>>    	VLV_WM_NUM_LEVELS = 1,
>>>    };
>>>
>>> +/* latency must be in 0.1us units. */
>>> +static unsigned int vlv_wm_method2(unsigned int pixel_rate,
>>> +				   unsigned int pipe_htotal,
>>> +				   unsigned int horiz_pixels,
>>> +				   unsigned int bytes_per_pixel,
>>> +				   unsigned int latency)
>>> +{
>>> +	unsigned int ret;
>>> +
>>> +	ret = (latency * pixel_rate) / (pipe_htotal * 10000);
>>> +	ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
>>> +	ret = DIV_ROUND_UP(ret, 64);
>>> +
>>> +	return ret;
>>> +}
>>> +
>>> +static void vlv_setup_wm_latency(struct drm_device *dev)
>>> +{
>>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>>> +
>>> +	/* all latencies in usec */
>>> +	dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
>>> +
>>> +	if (IS_CHERRYVIEW(dev_priv)) {
>>> +		dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
>>> +		dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
>>
>> nit #defines for these magic values please
>
> What's the point of doing that? These values are not repeated anywhere
> else.
>
>>
>>> +	}
>>> +}
>>> +
>>> +static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
>>> +				     struct intel_crtc *crtc,
>>> +				     const struct intel_plane_state *state,
>>> +				     int level)
>>> +{
>>> +	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
>>> +	int clock, htotal, pixel_size, width, wm;
>>> +
>>> +	if (dev_priv->wm.pri_latency[level] == 0)
>>> +		return USHRT_MAX;
>>> +
>>> +	if (!state->visible)
>>> +		return 0;
>>> +
>>> +	pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
>>> +	clock = crtc->config->base.adjusted_mode.crtc_clock;
>>> +	htotal = crtc->config->base.adjusted_mode.crtc_htotal;
>>> +	width = crtc->config->pipe_src_w;
>>> +	if (WARN_ON(htotal == 0))
>>> +		htotal = 1;
>>> +
>>> +	if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
>>> +		/*
>>> +		 * FIXME the formula gives values that are
>>> +		 * too big for the cursor FIFO, and hence we
>>> +		 * would never be able to use cursors. For
>>> +		 * now just hardcode the watermark.
>>> +		 */
>>> +		wm = 63;
>>
>> Hard coding to maximum value of 63. Should probably be programmed to
>> worst case instead of maximum.
>
> I have no idea what's the worst case. I was too lazy to try to
> empirically deduce some kind of number/formula that works all the time.
> Since it's a very small plane usually hardcoding it like this shouldn't
> hurt too much (I hope). We can't go above 63, so if that fails we're
> screwed anyway.
>

63 gets inverted to 0 and we are screwed if that doesn't work.

>>> +	} else {
>>> +		wm = vlv_wm_method2(clock, htotal, width, pixel_size,
>>> +				    dev_priv->wm.pri_latency[level] * 10);
>>> +	}
>>> +
>>> +	return min_t(int, wm, USHRT_MAX);
>>> +}
>>> +
>>>    static bool vlv_compute_sr_wm(struct drm_device *dev,
>>>    			      struct vlv_wm_values *wm)
>>>    {
>>> @@ -1105,6 +1167,249 @@ static void valleyview_update_wm(struct drm_crtc *crtc)
>>>
>>>    	if (cxsr_enabled)
>>>    		intel_set_memory_cxsr(dev_priv, true);
>>> +
>>> +	dev_priv->wm.vlv = wm;
>>> +}
>>> +
>>> +static void vlv_invert_wms(struct intel_crtc *crtc)
>>> +{
>>> +	struct vlv_wm_state *wm_state = &crtc->wm_state;
>>> +	int level;
>>> +
>>> +	for (level = 0; level < wm_state->num_levels; level++) {
>>> +		struct drm_device *dev = crtc->base.dev;
>>> +		const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
>>> +		struct intel_plane *plane;
>>> +
>>> +		wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane;
>>> +		wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor;
>>> +
>>> +		for_each_intel_plane_on_crtc(dev, crtc, plane) {
>>> +			switch (plane->base.type) {
>>> +				int sprite;
>>> +			case DRM_PLANE_TYPE_CURSOR:
>>> +				wm_state->wm[level].cursor = plane->wm.fifo_size -
>>> +					wm_state->wm[level].cursor;
>>> +				break;
>>> +			case DRM_PLANE_TYPE_PRIMARY:
>>> +				wm_state->wm[level].primary = plane->wm.fifo_size -
>>> +					wm_state->wm[level].primary;
>>> +				break;
>>> +			case DRM_PLANE_TYPE_OVERLAY:
>>> +				sprite = plane->plane;
>>> +				wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size -
>>> +					wm_state->wm[level].sprite[sprite];
>>> +				break;
>>> +			}
>>> +		}
>>> +	}
>>> +}
>>> +
>>> +static void _vlv_compute_wm(struct intel_crtc *crtc)
>>> +{
>>> +	struct drm_device *dev = crtc->base.dev;
>>> +	struct vlv_wm_state *wm_state = &crtc->wm_state;
>>> +	struct intel_plane *plane;
>>> +	int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
>>> +	int level;
>>> +
>>> +	memset(wm_state, 0, sizeof(*wm_state));
>>> +
>>> +	wm_state->cxsr = crtc->pipe != PIPE_C;
>>> +	if (IS_CHERRYVIEW(dev))
>>> +		wm_state->num_levels = CHV_WM_NUM_LEVELS;
>>> +	else
>>> +		wm_state->num_levels = VLV_WM_NUM_LEVELS;
>>> +
>>> +	wm_state->num_active_planes = 0;
>>> +	for_each_intel_plane_on_crtc(dev, crtc, plane) {
>>> +		struct intel_plane_state *state =
>>> +			to_intel_plane_state(plane->base.state);
>>> +
>>> +		if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
>>> +			continue;
>>> +
>>> +		if (state->visible)
>>> +			wm_state->num_active_planes++;
>>> +	}
>>> +
>>> +	if (wm_state->num_active_planes != 1)
>>> +		wm_state->cxsr = false;
>>> +
>>> +	if (wm_state->cxsr) {
>>> +		for (level = 0; level < wm_state->num_levels; level++) {
>>> +			wm_state->sr[level].plane = sr_fifo_size;
>>> +			wm_state->sr[level].cursor = 63;
>>> +		}
>>> +	}
>>> +
>>> +	for_each_intel_plane_on_crtc(dev, crtc, plane) {
>>> +		struct intel_plane_state *state =
>>> +			to_intel_plane_state(plane->base.state);
>>> +
>>> +		if (!state->visible)
>>> +			continue;
>>> +
>>> +		/* normal watermarks */
>>> +		for (level = 0; level < wm_state->num_levels; level++) {
>>> +			int wm = vlv_compute_wm_level(plane, crtc, state, level);
>>> +			int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511;
>>> +
>>> +			/* hack */
>>> +			if (WARN_ON(level == 0 && wm > max_wm))
>>> +				wm = max_wm;
>>> +
>>> +			if (wm > plane->wm.fifo_size)
>>> +				break;
>>> +
>>> +			switch (plane->base.type) {
>>> +				int sprite;
>>> +			case DRM_PLANE_TYPE_CURSOR:
>>> +				wm_state->wm[level].cursor = wm;
>>> +				break;
>>> +			case DRM_PLANE_TYPE_PRIMARY:
>>> +				wm_state->wm[level].primary = wm;
>>> +				break;
>>> +			case DRM_PLANE_TYPE_OVERLAY:
>>> +				sprite = plane->plane;
>>> +				wm_state->wm[level].sprite[sprite] = wm;
>>> +				break;
>>> +			}
>>> +		}
>>> +
>>> +		wm_state->num_levels = level;
>>> +
>>> +		if (!wm_state->cxsr)
>>> +			continue;
>>> +
>>> +		/* maxfifo watermarks */
>>> +		switch (plane->base.type) {
>>> +			int sprite, level;
>>> +		case DRM_PLANE_TYPE_CURSOR:
>>> +			for (level = 0; level < wm_state->num_levels; level++)
>>> +				wm_state->sr[level].cursor =
>>> +					wm_state->sr[level].cursor;
>>> +			break;
>>> +		case DRM_PLANE_TYPE_PRIMARY:
>>> +			for (level = 0; level < wm_state->num_levels; level++)
>>> +				wm_state->sr[level].plane =
>>> +					min(wm_state->sr[level].plane,
>>> +					    wm_state->wm[level].primary);
>>> +			break;
>>> +		case DRM_PLANE_TYPE_OVERLAY:
>>> +			sprite = plane->plane;
>>> +			for (level = 0; level < wm_state->num_levels; level++)
>>> +				wm_state->sr[level].plane =
>>> +					min(wm_state->sr[level].plane,
>>> +					    wm_state->wm[level].sprite[sprite]);
>>> +			break;
>>> +		}
>>> +	}
>>> +
>>> +	/* clear any (partially) filled invalid levels */
>>> +	for (level = wm_state->num_levels; level < CHV_WM_NUM_LEVELS; level++) {
>>> +		memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
>>> +		memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
>>> +	}
>>> +
>>> +	vlv_invert_wms(crtc);
>>> +}
>>> +
>>> +static void vlv_merge_wm(struct drm_device *dev,
>>> +			 struct vlv_wm_values *wm)
>>> +{
>>> +	struct intel_crtc *crtc;
>>> +	int num_active_crtcs = 0;
>>> +
>>> +	if (IS_CHERRYVIEW(dev))
>>> +		wm->level = VLV_WM_LEVEL_DDR_DVFS;
>>> +	else
>>> +		wm->level = VLV_WM_LEVEL_PM2;
>>> +	wm->cxsr = true;
>>> +
>>> +	for_each_intel_crtc(dev, crtc) {
>>> +		const struct vlv_wm_state *wm_state = &crtc->wm_state;
>>> +
>>> +		if (!crtc->active)
>>> +			continue;
>>> +
>>> +		if (!wm_state->cxsr)
>>> +			wm->cxsr = false;
>>> +
>>> +		num_active_crtcs++;
>>> +		wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
>>> +	}
>>> +
>>> +	if (num_active_crtcs != 1)
>>> +		wm->cxsr = false;
>>> +
>>> +	for_each_intel_crtc(dev, crtc) {
>>> +		struct vlv_wm_state *wm_state = &crtc->wm_state;
>>> +		enum pipe pipe = crtc->pipe;
>>> +
>>> +		if (!crtc->active)
>>> +			continue;
>>> +
>>> +		wm->pipe[pipe] = wm_state->wm[wm->level];
>>> +		if (wm->cxsr)
>>> +			wm->sr = wm_state->sr[wm->level];
>>> +
>>> +		wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2;
>>> +		wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2;
>>> +		wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2;
>>> +		wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2;
>>
>> Did we really decide that 0x2 was the final correct value for the DL
>> registers? I figured we would be running 0x7 for CHV.
>
> I'm wary of increasing it too much. That'll give the system agent a
> better chance of making a mess of things.
>

It does appear to work at a level of 2 and gives us room for improvement 
if there is a performance problem in the furture

>>
>>> +	}
>>> +}
>>> +
>>> +static void vlv_update_wm(struct drm_crtc *crtc)
>>> +{
>>> +	struct drm_device *dev = crtc->dev;
>>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>>> +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>>> +	enum pipe pipe = intel_crtc->pipe;
>>> +	struct vlv_wm_values wm = {};
>>> +
>>> +	_vlv_compute_wm(intel_crtc);
>>> +	vlv_merge_wm(dev, &wm);
>>> +
>>> +	if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0)
>>> +		return;
>>> +
>>> +	if (wm.level < VLV_WM_LEVEL_DDR_DVFS &&
>>> +	    dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS)
>>> +		chv_set_memory_dvfs(dev_priv, false);
>>> +
>>> +	if (wm.level < VLV_WM_LEVEL_PM5 &&
>>> +	    dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5)
>>> +		chv_set_memory_pm5(dev_priv, false);
>>> +
>>> +	if (!wm.cxsr && dev_priv->wm.vlv.cxsr) {
>>> +		intel_set_memory_cxsr(dev_priv, false);
>>> +		intel_wait_for_vblank(dev, pipe);
>>> +	}
>>> +
>>> +	vlv_write_wm_values(intel_crtc, &wm);
>>> +
>>> +	DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
>>> +		      "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n",
>>> +		      pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
>>> +		      wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1],
>>> +		      wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr);
>>> +
>>
>> Love the detailed DRM_DEBUG_KMS - of course its showing the only real
>> issue I have found in the series that the cursor watermark is being
>> programmed inverted. 63 when the cursor is disabled and 0 when its
>> enabled. This leads to an SRR% increase of ~10% when the cursor is
>> enabled.
>>
>> // Cursor disabled
>> [ 2662.355218] [drm:vlv_update_wm] Setting FIFO watermarks - A:
>> plane=340, cursor=63, sprite0=0, sprite1=0, SR: plane=1364, cursor=0
>> level=2 cxsr=1
>>
>> // Cursor Enabled
>> [ 2667.531049] [drm:vlv_update_wm] Setting FIFO watermarks - A:
>> plane=340, cursor=0, sprite0=0, sprite1=0, SR: plane=1364, cursor=0
>> level=2 cxsr=1
>>
>> I haven't found the line of code that actually inverts this programming.
>> I will continue to investigate during testing of this series.
>
> vlv_invert_wms()
>
> That's done on purpose since calculating the watermarks the "right way
> up" first makes makes it easier to think about them (ie. to compare with
> the FIFO size, and to merge the plane SR watermark). It also make things
> looks more ILK-like since ILK+ take the watermarks in the non-inverted
> form.
>

I wasn't aware the registers were inverted compared to ILK+. This is the 
only real issue I had with the series and after testing here by writing 
63 into the register I agree that this isn't really a problem.

Reviewed-by: Clint Taylor <Clinton.A.Taylor@intel.com>
Tested-by: Clint Taylor <Clinton.A.Taylor@intel.com>

>>
>>
>>> +	if (wm.cxsr && !dev_priv->wm.vlv.cxsr) {
>>> +		intel_wait_for_vblank(dev, pipe);
>>> +		intel_set_memory_cxsr(dev_priv, true);
>>> +	}
>>> +
>>> +	if (wm.level >= VLV_WM_LEVEL_PM5 &&
>>> +	    dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5)
>>> +		chv_set_memory_pm5(dev_priv, true);
>>> +
>>> +	if (wm.level >= VLV_WM_LEVEL_DDR_DVFS &&
>>> +	    dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS)
>>> +		chv_set_memory_dvfs(dev_priv, true);
>>> +
>>> +	dev_priv->wm.vlv = wm;
>>>    }
>>>
>>>    static void valleyview_update_sprite_wm(struct drm_plane *plane,
>>> @@ -6823,8 +7128,9 @@ void intel_init_pm(struct drm_device *dev)
>>>    		else if (INTEL_INFO(dev)->gen == 8)
>>>    			dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
>>>    	} else if (IS_CHERRYVIEW(dev)) {
>>> -		dev_priv->display.update_wm = valleyview_update_wm;
>>> -		dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
>>> +		vlv_setup_wm_latency(dev);
>>> +
>>> +		dev_priv->display.update_wm = vlv_update_wm;
>>>    		dev_priv->display.init_clock_gating =
>>>    			cherryview_init_clock_gating;
>>>    	} else if (IS_VALLEYVIEW(dev)) {
>>>
>
Jani Nikula June 29, 2015, 8:03 a.m. UTC | #4
On Fri, 26 Jun 2015, Ville Syrjälä <ville.syrjala@linux.intel.com> wrote:
> On Fri, Jun 26, 2015 at 10:56:33AM -0700, Clint Taylor wrote:
>> On 06/24/2015 12:00 PM, ville.syrjala@linux.intel.com wrote:
>> > +	if (IS_CHERRYVIEW(dev_priv)) {
>> > +		dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
>> > +		dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
>> 
>> nit #defines for these magic values please
>
> What's the point of doing that? These values are not repeated anywhere
> else.

Documentation.

BR,
Jani.
Daniel Vetter June 29, 2015, 8:54 a.m. UTC | #5
On Mon, Jun 29, 2015 at 11:03:04AM +0300, Jani Nikula wrote:
> On Fri, 26 Jun 2015, Ville Syrjälä <ville.syrjala@linux.intel.com> wrote:
> > On Fri, Jun 26, 2015 at 10:56:33AM -0700, Clint Taylor wrote:
> >> On 06/24/2015 12:00 PM, ville.syrjala@linux.intel.com wrote:
> >> > +	if (IS_CHERRYVIEW(dev_priv)) {
> >> > +		dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
> >> > +		dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
> >> 
> >> nit #defines for these magic values please
> >
> > What's the point of doing that? These values are not repeated anywhere
> > else.
> 
> Documentation.

I've seend the original watermark code which did this for the massive mess
that where gen2/3/4 wm code. It was unreadable, unreviewable and because
of that had bugs. I concur with Ville here.

What we might want to do is a macro to do the "logical wm setting" -> hw
value encoding, since there's some surprising differences there between
platforms. But imo that's better done as some large-scale overall project.
-Daniel

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 514adcf..37cc653 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -276,6 +276,12 @@  struct i915_hotplug {
 			    &dev->mode_config.plane_list,	\
 			    base.head)
 
+#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane)	\
+	list_for_each_entry(intel_plane,				\
+			    &(dev)->mode_config.plane_list,		\
+			    base.head)					\
+		if ((intel_plane)->pipe == (intel_crtc)->pipe)
+
 #define for_each_intel_crtc(dev, intel_crtc) \
 	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head)
 
@@ -1498,18 +1504,20 @@  struct ilk_wm_values {
 	enum intel_ddb_partitioning partitioning;
 };
 
-struct vlv_wm_values {
-	struct {
-		uint16_t primary;
-		uint16_t sprite[2];
-		uint8_t cursor;
-	} pipe[3];
+struct vlv_pipe_wm {
+	uint16_t primary;
+	uint16_t sprite[2];
+	uint8_t cursor;
+};
 
-	struct {
-		uint16_t plane;
-		uint8_t cursor;
-	} sr;
+struct vlv_sr_wm {
+	uint16_t plane;
+	uint8_t cursor;
+};
 
+struct vlv_wm_values {
+	struct vlv_pipe_wm pipe[3];
+	struct vlv_sr_wm sr;
 	struct {
 		uint8_t cursor;
 		uint8_t sprite[2];
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b15d57f..1424320 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4690,8 +4690,11 @@  intel_pre_disable_primary(struct drm_crtc *crtc)
 	 * event which is after the vblank start event, so we need to have a
 	 * wait-for-vblank between disabling the plane and the pipe.
 	 */
-	if (HAS_GMCH_DISPLAY(dev))
+	if (HAS_GMCH_DISPLAY(dev)) {
 		intel_set_memory_cxsr(dev_priv, false);
+		dev_priv->wm.vlv.cxsr = false;
+		intel_wait_for_vblank(dev, pipe);
+	}
 
 	/*
 	 * FIXME IPS should be fine as long as one plane is
@@ -6005,7 +6008,6 @@  static void valleyview_crtc_enable(struct drm_crtc *crtc)
 
 	intel_crtc_load_lut(crtc);
 
-	intel_update_watermarks(crtc);
 	intel_enable_pipe(intel_crtc);
 
 	assert_vblank_disabled(crtc);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 3673a71..f26a680 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -462,6 +462,15 @@  struct intel_crtc_state {
 	enum pipe hsw_workaround_pipe;
 };
 
+struct vlv_wm_state {
+	struct vlv_pipe_wm wm[3];
+	struct vlv_sr_wm sr[3];
+	uint8_t num_active_planes;
+	uint8_t num_levels;
+	uint8_t level;
+	bool cxsr;
+};
+
 struct intel_pipe_wm {
 	struct intel_wm_level wm[5];
 	uint32_t linetime;
@@ -564,6 +573,8 @@  struct intel_crtc {
 
 	/* scalers available on this crtc */
 	int num_scalers;
+
+	struct vlv_wm_state wm_state;
 };
 
 struct intel_plane_wm_parameters {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index e67548d..d046e5f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -335,8 +335,6 @@  void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
 	if (IS_VALLEYVIEW(dev)) {
 		I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
 		POSTING_READ(FW_BLC_SELF_VLV);
-		if (IS_CHERRYVIEW(dev))
-			chv_set_memory_pm5(dev_priv, enable);
 	} else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
 		I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
 		POSTING_READ(FW_BLC_SELF);
@@ -929,8 +927,6 @@  static void vlv_write_wm_values(struct intel_crtc *crtc,
 	}
 
 	POSTING_READ(DSPFW1);
-
-	dev_priv->wm.vlv = *wm;
 }
 
 #undef FW_WM_VLV
@@ -1014,6 +1010,72 @@  enum vlv_wm_level {
 	VLV_WM_NUM_LEVELS = 1,
 };
 
+/* latency must be in 0.1us units. */
+static unsigned int vlv_wm_method2(unsigned int pixel_rate,
+				   unsigned int pipe_htotal,
+				   unsigned int horiz_pixels,
+				   unsigned int bytes_per_pixel,
+				   unsigned int latency)
+{
+	unsigned int ret;
+
+	ret = (latency * pixel_rate) / (pipe_htotal * 10000);
+	ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
+	ret = DIV_ROUND_UP(ret, 64);
+
+	return ret;
+}
+
+static void vlv_setup_wm_latency(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* all latencies in usec */
+	dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
+
+	if (IS_CHERRYVIEW(dev_priv)) {
+		dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
+		dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
+	}
+}
+
+static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
+				     struct intel_crtc *crtc,
+				     const struct intel_plane_state *state,
+				     int level)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	int clock, htotal, pixel_size, width, wm;
+
+	if (dev_priv->wm.pri_latency[level] == 0)
+		return USHRT_MAX;
+
+	if (!state->visible)
+		return 0;
+
+	pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
+	clock = crtc->config->base.adjusted_mode.crtc_clock;
+	htotal = crtc->config->base.adjusted_mode.crtc_htotal;
+	width = crtc->config->pipe_src_w;
+	if (WARN_ON(htotal == 0))
+		htotal = 1;
+
+	if (plane->base.type == DRM_PLANE_TYPE_CURSOR) {
+		/*
+		 * FIXME the formula gives values that are
+		 * too big for the cursor FIFO, and hence we
+		 * would never be able to use cursors. For
+		 * now just hardcode the watermark.
+		 */
+		wm = 63;
+	} else {
+		wm = vlv_wm_method2(clock, htotal, width, pixel_size,
+				    dev_priv->wm.pri_latency[level] * 10);
+	}
+
+	return min_t(int, wm, USHRT_MAX);
+}
+
 static bool vlv_compute_sr_wm(struct drm_device *dev,
 			      struct vlv_wm_values *wm)
 {
@@ -1105,6 +1167,249 @@  static void valleyview_update_wm(struct drm_crtc *crtc)
 
 	if (cxsr_enabled)
 		intel_set_memory_cxsr(dev_priv, true);
+
+	dev_priv->wm.vlv = wm;
+}
+
+static void vlv_invert_wms(struct intel_crtc *crtc)
+{
+	struct vlv_wm_state *wm_state = &crtc->wm_state;
+	int level;
+
+	for (level = 0; level < wm_state->num_levels; level++) {
+		struct drm_device *dev = crtc->base.dev;
+		const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
+		struct intel_plane *plane;
+
+		wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane;
+		wm_state->sr[level].cursor = 63 - wm_state->sr[level].cursor;
+
+		for_each_intel_plane_on_crtc(dev, crtc, plane) {
+			switch (plane->base.type) {
+				int sprite;
+			case DRM_PLANE_TYPE_CURSOR:
+				wm_state->wm[level].cursor = plane->wm.fifo_size -
+					wm_state->wm[level].cursor;
+				break;
+			case DRM_PLANE_TYPE_PRIMARY:
+				wm_state->wm[level].primary = plane->wm.fifo_size -
+					wm_state->wm[level].primary;
+				break;
+			case DRM_PLANE_TYPE_OVERLAY:
+				sprite = plane->plane;
+				wm_state->wm[level].sprite[sprite] = plane->wm.fifo_size -
+					wm_state->wm[level].sprite[sprite];
+				break;
+			}
+		}
+	}
+}
+
+static void _vlv_compute_wm(struct intel_crtc *crtc)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct vlv_wm_state *wm_state = &crtc->wm_state;
+	struct intel_plane *plane;
+	int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1;
+	int level;
+
+	memset(wm_state, 0, sizeof(*wm_state));
+
+	wm_state->cxsr = crtc->pipe != PIPE_C;
+	if (IS_CHERRYVIEW(dev))
+		wm_state->num_levels = CHV_WM_NUM_LEVELS;
+	else
+		wm_state->num_levels = VLV_WM_NUM_LEVELS;
+
+	wm_state->num_active_planes = 0;
+	for_each_intel_plane_on_crtc(dev, crtc, plane) {
+		struct intel_plane_state *state =
+			to_intel_plane_state(plane->base.state);
+
+		if (plane->base.type == DRM_PLANE_TYPE_CURSOR)
+			continue;
+
+		if (state->visible)
+			wm_state->num_active_planes++;
+	}
+
+	if (wm_state->num_active_planes != 1)
+		wm_state->cxsr = false;
+
+	if (wm_state->cxsr) {
+		for (level = 0; level < wm_state->num_levels; level++) {
+			wm_state->sr[level].plane = sr_fifo_size;
+			wm_state->sr[level].cursor = 63;
+		}
+	}
+
+	for_each_intel_plane_on_crtc(dev, crtc, plane) {
+		struct intel_plane_state *state =
+			to_intel_plane_state(plane->base.state);
+
+		if (!state->visible)
+			continue;
+
+		/* normal watermarks */
+		for (level = 0; level < wm_state->num_levels; level++) {
+			int wm = vlv_compute_wm_level(plane, crtc, state, level);
+			int max_wm = plane->base.type == DRM_PLANE_TYPE_CURSOR ? 63 : 511;
+
+			/* hack */
+			if (WARN_ON(level == 0 && wm > max_wm))
+				wm = max_wm;
+
+			if (wm > plane->wm.fifo_size)
+				break;
+
+			switch (plane->base.type) {
+				int sprite;
+			case DRM_PLANE_TYPE_CURSOR:
+				wm_state->wm[level].cursor = wm;
+				break;
+			case DRM_PLANE_TYPE_PRIMARY:
+				wm_state->wm[level].primary = wm;
+				break;
+			case DRM_PLANE_TYPE_OVERLAY:
+				sprite = plane->plane;
+				wm_state->wm[level].sprite[sprite] = wm;
+				break;
+			}
+		}
+
+		wm_state->num_levels = level;
+
+		if (!wm_state->cxsr)
+			continue;
+
+		/* maxfifo watermarks */
+		switch (plane->base.type) {
+			int sprite, level;
+		case DRM_PLANE_TYPE_CURSOR:
+			for (level = 0; level < wm_state->num_levels; level++)
+				wm_state->sr[level].cursor =
+					wm_state->sr[level].cursor;
+			break;
+		case DRM_PLANE_TYPE_PRIMARY:
+			for (level = 0; level < wm_state->num_levels; level++)
+				wm_state->sr[level].plane =
+					min(wm_state->sr[level].plane,
+					    wm_state->wm[level].primary);
+			break;
+		case DRM_PLANE_TYPE_OVERLAY:
+			sprite = plane->plane;
+			for (level = 0; level < wm_state->num_levels; level++)
+				wm_state->sr[level].plane =
+					min(wm_state->sr[level].plane,
+					    wm_state->wm[level].sprite[sprite]);
+			break;
+		}
+	}
+
+	/* clear any (partially) filled invalid levels */
+	for (level = wm_state->num_levels; level < CHV_WM_NUM_LEVELS; level++) {
+		memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level]));
+		memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level]));
+	}
+
+	vlv_invert_wms(crtc);
+}
+
+static void vlv_merge_wm(struct drm_device *dev,
+			 struct vlv_wm_values *wm)
+{
+	struct intel_crtc *crtc;
+	int num_active_crtcs = 0;
+
+	if (IS_CHERRYVIEW(dev))
+		wm->level = VLV_WM_LEVEL_DDR_DVFS;
+	else
+		wm->level = VLV_WM_LEVEL_PM2;
+	wm->cxsr = true;
+
+	for_each_intel_crtc(dev, crtc) {
+		const struct vlv_wm_state *wm_state = &crtc->wm_state;
+
+		if (!crtc->active)
+			continue;
+
+		if (!wm_state->cxsr)
+			wm->cxsr = false;
+
+		num_active_crtcs++;
+		wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
+	}
+
+	if (num_active_crtcs != 1)
+		wm->cxsr = false;
+
+	for_each_intel_crtc(dev, crtc) {
+		struct vlv_wm_state *wm_state = &crtc->wm_state;
+		enum pipe pipe = crtc->pipe;
+
+		if (!crtc->active)
+			continue;
+
+		wm->pipe[pipe] = wm_state->wm[wm->level];
+		if (wm->cxsr)
+			wm->sr = wm_state->sr[wm->level];
+
+		wm->ddl[pipe].primary = DDL_PRECISION_HIGH | 2;
+		wm->ddl[pipe].sprite[0] = DDL_PRECISION_HIGH | 2;
+		wm->ddl[pipe].sprite[1] = DDL_PRECISION_HIGH | 2;
+		wm->ddl[pipe].cursor = DDL_PRECISION_HIGH | 2;
+	}
+}
+
+static void vlv_update_wm(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	enum pipe pipe = intel_crtc->pipe;
+	struct vlv_wm_values wm = {};
+
+	_vlv_compute_wm(intel_crtc);
+	vlv_merge_wm(dev, &wm);
+
+	if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0)
+		return;
+
+	if (wm.level < VLV_WM_LEVEL_DDR_DVFS &&
+	    dev_priv->wm.vlv.level >= VLV_WM_LEVEL_DDR_DVFS)
+		chv_set_memory_dvfs(dev_priv, false);
+
+	if (wm.level < VLV_WM_LEVEL_PM5 &&
+	    dev_priv->wm.vlv.level >= VLV_WM_LEVEL_PM5)
+		chv_set_memory_pm5(dev_priv, false);
+
+	if (!wm.cxsr && dev_priv->wm.vlv.cxsr) {
+		intel_set_memory_cxsr(dev_priv, false);
+		intel_wait_for_vblank(dev, pipe);
+	}
+
+	vlv_write_wm_values(intel_crtc, &wm);
+
+	DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, "
+		      "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n",
+		      pipe_name(pipe), wm.pipe[pipe].primary, wm.pipe[pipe].cursor,
+		      wm.pipe[pipe].sprite[0], wm.pipe[pipe].sprite[1],
+		      wm.sr.plane, wm.sr.cursor, wm.level, wm.cxsr);
+
+	if (wm.cxsr && !dev_priv->wm.vlv.cxsr) {
+		intel_wait_for_vblank(dev, pipe);
+		intel_set_memory_cxsr(dev_priv, true);
+	}
+
+	if (wm.level >= VLV_WM_LEVEL_PM5 &&
+	    dev_priv->wm.vlv.level < VLV_WM_LEVEL_PM5)
+		chv_set_memory_pm5(dev_priv, true);
+
+	if (wm.level >= VLV_WM_LEVEL_DDR_DVFS &&
+	    dev_priv->wm.vlv.level < VLV_WM_LEVEL_DDR_DVFS)
+		chv_set_memory_dvfs(dev_priv, true);
+
+	dev_priv->wm.vlv = wm;
 }
 
 static void valleyview_update_sprite_wm(struct drm_plane *plane,
@@ -6823,8 +7128,9 @@  void intel_init_pm(struct drm_device *dev)
 		else if (INTEL_INFO(dev)->gen == 8)
 			dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
 	} else if (IS_CHERRYVIEW(dev)) {
-		dev_priv->display.update_wm = valleyview_update_wm;
-		dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
+		vlv_setup_wm_latency(dev);
+
+		dev_priv->display.update_wm = vlv_update_wm;
 		dev_priv->display.init_clock_gating =
 			cherryview_init_clock_gating;
 	} else if (IS_VALLEYVIEW(dev)) {