diff mbox

[v7,8/8] drm/i915/gen9: WM memory bandwidth related workaround

Message ID 20161201154940.24446-9-mahesh1.kumar@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kumar, Mahesh Dec. 1, 2016, 3:49 p.m. UTC
This patch implemnets Workariunds related to display arbitrated memory
bandwidth. These WA are applicabe for all gen-9 based platforms.

Changes since v1:
 - Rebase on top of Paulo's patch series
Changes since v2:
 - Address review comments
 - Rebase/rework as per other patch changes in series
Changes since v3:
 - Rework based on Maarten's comments

Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h      |  11 +++
 drivers/gpu/drm/i915/intel_display.c |  24 ++++++
 drivers/gpu/drm/i915/intel_pm.c      | 155 +++++++++++++++++++++++++++++++++--
 3 files changed, 181 insertions(+), 9 deletions(-)

Comments

Zanoni, Paulo R Dec. 15, 2016, 5:07 p.m. UTC | #1
Em Qui, 2016-12-01 às 21:19 +0530, Mahesh Kumar escreveu:
> This patch implemnets Workariunds related to display arbitrated
> memory
> bandwidth. These WA are applicabe for all gen-9 based platforms.

3 typos above.

The WA is already implemented. What the patch does is that it opts-out
of the WA in case we conclude it's safe. Please say this in the commit
message.

> 
> Changes since v1:
>  - Rebase on top of Paulo's patch series
> Changes since v2:
>  - Address review comments
>  - Rebase/rework as per other patch changes in series
> Changes since v3:
>  - Rework based on Maarten's comments
> 
> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h      |  11 +++
>  drivers/gpu/drm/i915/intel_display.c |  24 ++++++
>  drivers/gpu/drm/i915/intel_pm.c      | 155
> +++++++++++++++++++++++++++++++++--
>  3 files changed, 181 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index 69213a4..3126259 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1216,6 +1216,13 @@ enum intel_sbi_destination {
>  	SBI_MPHY,
>  };
>  
> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
> +enum watermark_memory_wa {
> +	WATERMARK_WA_NONE,
> +	WATERMARK_WA_X_TILED,
> +	WATERMARK_WA_Y_TILED,
> +};
> +
>  #define QUIRK_PIPEA_FORCE (1<<0)
>  #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>  #define QUIRK_INVERT_BRIGHTNESS (1<<2)
> @@ -1788,6 +1795,10 @@ struct skl_ddb_allocation {
>  
>  struct skl_wm_values {
>  	unsigned dirty_pipes;
> +	/* any WaterMark memory workaround Required */

CapitaliZation is weird Here. Besides, no need for the comment.


> +	enum watermark_memory_wa mem_wa;
> +	uint32_t pipe_bw_kbps[I915_MAX_PIPES];
> +	bool pipe_ytiled[I915_MAX_PIPES];
>  	struct skl_ddb_allocation ddb;
>  };
>  
> diff --git a/drivers/gpu/drm/i915/intel_display.c
> b/drivers/gpu/drm/i915/intel_display.c
> index 5d11002..f8da488 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -14574,6 +14574,28 @@ static void intel_atomic_track_fbs(struct
> drm_atomic_state *state)
>  				  to_intel_plane(plane)-
> >frontbuffer_bit);
>  }
>  
> +static void
> +intel_update_wm_bw_wa(struct drm_atomic_state *state)
> +{
> +	struct intel_atomic_state *intel_state =
> to_intel_atomic_state(state);
> +	struct drm_i915_private *dev_priv = to_i915(state->dev);
> +	const struct drm_crtc *crtc;
> +	const struct drm_crtc_state *cstate;
> +	struct skl_wm_values *results = &intel_state->wm_results;
> +	struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw;
> +	int i;
> +
> +	if (!IS_GEN9(dev_priv))
> +		return;
> +
> +	for_each_crtc_in_state(state, crtc, cstate, i) {
> +		hw_vals->pipe_bw_kbps[i] = results->pipe_bw_kbps[i];
> +		hw_vals->pipe_ytiled[i] = results->pipe_ytiled[i];
> +	}
> +
> +	hw_vals->mem_wa = results->mem_wa;
> +}

I think we can just patch skl_copy_wm_for_pipe() to also copy the
fields we need instead of adding this function. Whouldn't that be much
simpler? Anyway, this one looks correct, so no need to change if you
don't want.


> +
>  /**
>   * intel_atomic_commit - commit validated state object
>   * @dev: DRM device
> @@ -14614,6 +14636,8 @@ static int intel_atomic_commit(struct
> drm_device *dev,
>  	intel_shared_dpll_commit(state);
>  	intel_atomic_track_fbs(state);
>  
> +	intel_update_wm_bw_wa(state);
> +
>  	if (intel_state->modeset) {
>  		memcpy(dev_priv->min_pixclk, intel_state-
> >min_pixclk,
>  		       sizeof(intel_state->min_pixclk));
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index cc8fc84..fda6e1e 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2878,11 +2878,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev)
>  
>  #define SKL_SAGV_BLOCK_TIME	30 /* µs */
>  
> -/*
> - * FIXME: We still don't have the proper code detect if we need to
> apply the WA,
> - * so assume we'll always need it in order to avoid underruns.
> - */
> -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
> +static bool intel_needs_memory_bw_wa(struct intel_atomic_state
> *state)

Why are we renaming this function?

Also, in my last review I suggested that we kill this function. I don't
think it makes sense to have this function anymore.

Besides, function intel_can_enable_sagv() needs to look at the value we
assigned to enum watermark_memory_wa, not get a true/false based on
platform version.


>  {
>  	struct drm_i915_private *dev_priv = to_i915(state-
> >base.dev);
>  
> @@ -3056,7 +3052,7 @@ bool intel_can_enable_sagv(struct
> drm_atomic_state *state)
>  
>  		latency = dev_priv->wm.skl_latency[level];
>  
> -		if (skl_needs_memory_bw_wa(intel_state) &&
> +		if (intel_needs_memory_bw_wa(intel_state) &&
>  		    plane->base.state->fb->modifier ==
>  		    I915_FORMAT_MOD_X_TILED)
>  			latency += 15;
> @@ -3584,7 +3580,7 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>  	uint32_t y_min_scanlines;
>  	struct intel_atomic_state *state =
>  		to_intel_atomic_state(cstate->base.state);
> -	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
> +	enum watermark_memory_wa mem_wa;
>  	bool y_tiled, x_tiled;
>  
>  	if (latency == 0 || !cstate->base.active || !intel_pstate-
> >base.visible) {
> @@ -3600,7 +3596,8 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>  	if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled)
>  		latency += 4;
>  
> -	if (apply_memory_bw_wa && x_tiled)
> +	mem_wa = state->wm_results.mem_wa;

This assignment could have been done during declaration.


> +	if (mem_wa != WATERMARK_WA_NONE && x_tiled)
>  		latency += 15;
>  
>  	width = drm_rect_width(&intel_pstate->base.src) >> 16;
> @@ -3635,7 +3632,7 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>  		y_min_scanlines = 4;
>  	}
>  
> -	if (apply_memory_bw_wa)
> +	if (mem_wa == WATERMARK_WA_Y_TILED)
>  		y_min_scanlines *= 2;
>  
>  	plane_bytes_per_line = width * cpp;
> @@ -4063,6 +4060,15 @@ skl_compute_ddb(struct drm_atomic_state
> *state)
>  	}
>  
>  	/*
> +	 * If Watermark workaround is changed we need to recalculate
> +	 * watermark values for all active pipes
> +	 */
> +	if (intel_state->wm_results.mem_wa != dev_priv-
> >wm.skl_hw.mem_wa) {
> +		realloc_pipes = ~0;
> +		intel_state->wm_results.dirty_pipes = ~0;
> +	}

But then skl_ddb_add_affected_planes() only actually adds to the commit
the planes that have different DDB partitioning. It seems to me that it
may be possible to have a case where the WA changes and the DDB stays
the same, so we need to find a way to include every CRTC there. Maybe
we could just go and call the function that adds every CRTC here.


> +
> +	/*
>  	 * We're not recomputing for the pipes not included in the
> commit, so
>  	 * make sure we start with the current state.
>  	 */
> @@ -4087,6 +4093,133 @@ skl_compute_ddb(struct drm_atomic_state
> *state)
>  	return 0;
>  }
>  
> +static int
> +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
> +{
> +	struct drm_device *dev = state->dev;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct intel_atomic_state *intel_state =
> to_intel_atomic_state(state);
> +	struct memdev_info *memdev_info = &dev_priv->memdev_info;
> +	struct skl_wm_values *results = &intel_state->wm_results;
> +	struct drm_crtc *crtc;
> +	struct drm_crtc_state *cstate;
> +	int active_pipes = 0;
> +	uint32_t max_pipe_bw_kbps = 0, total_pipe_bw_kbps;
> +	int display_bw_percentage;
> +	bool y_tile_enabled = false;
> +	int ret, i;
> +
> +	if (!intel_needs_memory_bw_wa(intel_state)) {
> +		results->mem_wa = WATERMARK_WA_NONE;
> +		return 0;
> +	}
> +
> +	if (!memdev_info->valid)
> +		goto exit;

There's no reason to use a label if it's only called here. Just move
all that code to the if statement.


> +
> +	ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
> +			state->acquire_ctx);
> +	if (ret)
> +		return ret;

Why are we getting this here? Why this lock specifically? What's it
protecting?

Doc says:
- ww mutex protecting connector state and routing
- protects connector->encoder and encoder->crtc links 

We're not touching any of that here.


> +
> +	memcpy(results->pipe_bw_kbps, dev_priv-
> >wm.skl_hw.pipe_bw_kbps,
> +			sizeof(results->pipe_bw_kbps));
> +	memcpy(results->pipe_ytiled, dev_priv-
> >wm.skl_hw.pipe_ytiled,
> +			sizeof(results->pipe_ytiled));
> +
> +	for_each_crtc_in_state(state, crtc, cstate, i) {
> +		struct drm_plane *plane;
> +		const struct drm_plane_state *pstate;
> +		int active_planes = 0;
> +		uint32_t max_plane_bw_kbps = 0;
> +
> +		results->pipe_ytiled[i] = false;
> +
> +		if (!cstate->active) {
> +			results->pipe_bw_kbps[i] = 0;
> +			continue;
> +		}
> +
> +		drm_atomic_crtc_state_for_each_plane_state(plane,
> pstate,
> +								csta
> te) {
> +			struct drm_framebuffer *fb;
> +			uint32_t plane_bw_kbps;
> +			enum plane_id id = to_intel_plane(plane)-
> >id;
> +
> +			if (!pstate->visible)
> +				continue;
> +
> +			if (WARN_ON(!pstate->fb))
> +				continue;
> +
> +			if (id == PLANE_CURSOR)
> +				continue;
> +
> +			fb = pstate->fb;
> +			if ((fb->modifier == I915_FORMAT_MOD_Y_TILED
> ||
> +				fb->modifier ==
> I915_FORMAT_MOD_Yf_TILED))
> +				results->pipe_ytiled[i] = true;
> +
> +			plane_bw_kbps =
> skl_adjusted_plane_pixel_rate(
> +						to_intel_crtc_state(
> cstate),
> +						to_intel_plane_state
> (pstate));
> +			max_plane_bw_kbps = max(plane_bw_kbps,
> +							max_plane_bw
> _kbps);
> +			active_planes++;
> +		}
> +		results->pipe_bw_kbps[i] = max_plane_bw_kbps *
> active_planes;
> +	}
> +
> +	for_each_pipe(dev_priv, i) {
> +		if (results->pipe_bw_kbps[i]) {
> +			max_pipe_bw_kbps = max(max_pipe_bw_kbps,
> +					results->pipe_bw_kbps[i]);
> +			active_pipes++;
> +		}
> +		if (results->pipe_ytiled[i])
> +			y_tile_enabled = true;
> +	}
> +
> +	total_pipe_bw_kbps = max_pipe_bw_kbps * active_pipes;
> +	display_bw_percentage = DIV_ROUND_UP_ULL(total_pipe_bw_kbps
> * 100,
> +						memdev_info-
> >bandwidth_kbps);

Shouldn't this be just DIV_ROUND_UP()? I don't see 64 bit variables
here, nor the possibility of overflows.


> +
> +	/*
> +	 * If there is any Ytile plane enabled and arbitrated
> display
> +	 * bandwidth > 20% of raw system memory bandwidth
> +	 * Enale Y-tile related WA
> +	 *
> +	 * If memory is dual channel single rank, Xtile limit = 35%,
> else Xtile
> +	 * limit = 60%
> +	 * If there is no Ytile plane enabled and
> +	 * arbitrated display bandwidth > Xtile limit
> +	 * Enable X-tile realated WA
> +	 */
> +	if (y_tile_enabled && (display_bw_percentage > 20))
> +		results->mem_wa = WATERMARK_WA_Y_TILED;
> +	else {
> +		int x_tile_percentage = 60;
> +		enum rank rank = DRAM_RANK_SINGLE;
> +
> +		if (memdev_info->rank != DRAM_RANK_INVALID)
> +			rank = memdev_info->rank;

I really think that the previous patch should prevent this. If
memdev_info->rank is invalid then memdev_info->valid should also be
false and we'd have returned at the beginning of the function. With
that, this part of the code could be simplified a little bit since then
we'd have no need to choose a default rank.



> +
> +		if ((rank == DRAM_RANK_SINGLE) &&
> +					(memdev_info->num_channels
> == 2))
> +			x_tile_percentage = 35;
> +
> +		if (display_bw_percentage > x_tile_percentage)
> +			results->mem_wa = WATERMARK_WA_X_TILED;
> +	}
> +
> +	return 0;
> +
> +exit:
> +	results->mem_wa = WATERMARK_WA_Y_TILED;
> +	return 0;
> +}
> +
> +

Two blank lines here.


>  static void
>  skl_copy_wm_for_pipe(struct skl_wm_values *dst,
>  		     struct skl_wm_values *src,
> @@ -4162,6 +4295,10 @@ skl_compute_wm(struct drm_atomic_state *state)
>  	/* Clear all dirty flags */
>  	results->dirty_pipes = 0;
>  
> +	ret = skl_compute_memory_bandwidth_wm_wa(state);
> +	if (ret)
> +		return ret;
> +
>  	ret = skl_compute_ddb(state);
>  	if (ret)
>  		return ret;
Kumar, Mahesh Feb. 15, 2017, 3 p.m. UTC | #2
Hi,


On Thursday 15 December 2016 10:37 PM, Paulo Zanoni wrote:
> Em Qui, 2016-12-01 às 21:19 +0530, Mahesh Kumar escreveu:
>> This patch implemnets Workariunds related to display arbitrated
>> memory
>> bandwidth. These WA are applicabe for all gen-9 based platforms.
> 3 typos above.
>
> The WA is already implemented. What the patch does is that it opts-out
> of the WA in case we conclude it's safe. Please say this in the commit
> message.
updated the commit msg
>> Changes since v1:
>>   - Rebase on top of Paulo's patch series
>> Changes since v2:
>>   - Address review comments
>>   - Rebase/rework as per other patch changes in series
>> Changes since v3:
>>   - Rework based on Maarten's comments
>>
>> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h      |  11 +++
>>   drivers/gpu/drm/i915/intel_display.c |  24 ++++++
>>   drivers/gpu/drm/i915/intel_pm.c      | 155
>> +++++++++++++++++++++++++++++++++--
>>   3 files changed, 181 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 69213a4..3126259 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1216,6 +1216,13 @@ enum intel_sbi_destination {
>>   	SBI_MPHY,
>>   };
>>   
>> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
>> +enum watermark_memory_wa {
>> +	WATERMARK_WA_NONE,
>> +	WATERMARK_WA_X_TILED,
>> +	WATERMARK_WA_Y_TILED,
>> +};
>> +
>>   #define QUIRK_PIPEA_FORCE (1<<0)
>>   #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>>   #define QUIRK_INVERT_BRIGHTNESS (1<<2)
>> @@ -1788,6 +1795,10 @@ struct skl_ddb_allocation {
>>   
>>   struct skl_wm_values {
>>   	unsigned dirty_pipes;
>> +	/* any WaterMark memory workaround Required */
> CapitaliZation is weird Here. Besides, no need for the comment.
>
>
>> +	enum watermark_memory_wa mem_wa;
>> +	uint32_t pipe_bw_kbps[I915_MAX_PIPES];
>> +	bool pipe_ytiled[I915_MAX_PIPES];
>>   	struct skl_ddb_allocation ddb;
>>   };
>>   
>> diff --git a/drivers/gpu/drm/i915/intel_display.c
>> b/drivers/gpu/drm/i915/intel_display.c
>> index 5d11002..f8da488 100644
>> --- a/drivers/gpu/drm/i915/intel_display.c
>> +++ b/drivers/gpu/drm/i915/intel_display.c
>> @@ -14574,6 +14574,28 @@ static void intel_atomic_track_fbs(struct
>> drm_atomic_state *state)
>>   				  to_intel_plane(plane)-
>>> frontbuffer_bit);
>>   }
>>   
>> +static void
>> +intel_update_wm_bw_wa(struct drm_atomic_state *state)
>> +{
>> +	struct intel_atomic_state *intel_state =
>> to_intel_atomic_state(state);
>> +	struct drm_i915_private *dev_priv = to_i915(state->dev);
>> +	const struct drm_crtc *crtc;
>> +	const struct drm_crtc_state *cstate;
>> +	struct skl_wm_values *results = &intel_state->wm_results;
>> +	struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw;
>> +	int i;
>> +
>> +	if (!IS_GEN9(dev_priv))
>> +		return;
>> +
>> +	for_each_crtc_in_state(state, crtc, cstate, i) {
>> +		hw_vals->pipe_bw_kbps[i] = results->pipe_bw_kbps[i];
>> +		hw_vals->pipe_ytiled[i] = results->pipe_ytiled[i];
>> +	}
>> +
>> +	hw_vals->mem_wa = results->mem_wa;
>> +}
> I think we can just patch skl_copy_wm_for_pipe() to also copy the
> fields we need instead of adding this function. Whouldn't that be much
> simpler? Anyway, this one looks correct, so no need to change if you
> don't want.
>
>
>> +
>>   /**
>>    * intel_atomic_commit - commit validated state object
>>    * @dev: DRM device
>> @@ -14614,6 +14636,8 @@ static int intel_atomic_commit(struct
>> drm_device *dev,
>>   	intel_shared_dpll_commit(state);
>>   	intel_atomic_track_fbs(state);
>>   
>> +	intel_update_wm_bw_wa(state);
>> +
>>   	if (intel_state->modeset) {
>>   		memcpy(dev_priv->min_pixclk, intel_state-
>>> min_pixclk,
>>   		       sizeof(intel_state->min_pixclk));
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c
>> b/drivers/gpu/drm/i915/intel_pm.c
>> index cc8fc84..fda6e1e 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -2878,11 +2878,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev)
>>   
>>   #define SKL_SAGV_BLOCK_TIME	30 /* µs */
>>   
>> -/*
>> - * FIXME: We still don't have the proper code detect if we need to
>> apply the WA,
>> - * so assume we'll always need it in order to avoid underruns.
>> - */
>> -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
>> +static bool intel_needs_memory_bw_wa(struct intel_atomic_state
>> *state)
> Why are we renaming this function?
>
> Also, in my last review I suggested that we kill this function. I don't
> think it makes sense to have this function anymore.
>
> Besides, function intel_can_enable_sagv() needs to look at the value we
> assigned to enum watermark_memory_wa, not get a true/false based on
> platform version.
removed the function
>
>
>>   {
>>   	struct drm_i915_private *dev_priv = to_i915(state-
>>> base.dev);
>>   
>> @@ -3056,7 +3052,7 @@ bool intel_can_enable_sagv(struct
>> drm_atomic_state *state)
>>   
>>   		latency = dev_priv->wm.skl_latency[level];
>>   
>> -		if (skl_needs_memory_bw_wa(intel_state) &&
>> +		if (intel_needs_memory_bw_wa(intel_state) &&
>>   		    plane->base.state->fb->modifier ==
>>   		    I915_FORMAT_MOD_X_TILED)
>>   			latency += 15;
>> @@ -3584,7 +3580,7 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>>   	uint32_t y_min_scanlines;
>>   	struct intel_atomic_state *state =
>>   		to_intel_atomic_state(cstate->base.state);
>> -	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
>> +	enum watermark_memory_wa mem_wa;
>>   	bool y_tiled, x_tiled;
>>   
>>   	if (latency == 0 || !cstate->base.active || !intel_pstate-
>>> base.visible) {
>> @@ -3600,7 +3596,8 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>>   	if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled)
>>   		latency += 4;
>>   
>> -	if (apply_memory_bw_wa && x_tiled)
>> +	mem_wa = state->wm_results.mem_wa;
> This assignment could have been done during declaration.
done
>
>
>> +	if (mem_wa != WATERMARK_WA_NONE && x_tiled)
>>   		latency += 15;
>>   
>>   	width = drm_rect_width(&intel_pstate->base.src) >> 16;
>> @@ -3635,7 +3632,7 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>>   		y_min_scanlines = 4;
>>   	}
>>   
>> -	if (apply_memory_bw_wa)
>> +	if (mem_wa == WATERMARK_WA_Y_TILED)
>>   		y_min_scanlines *= 2;
>>   
>>   	plane_bytes_per_line = width * cpp;
>> @@ -4063,6 +4060,15 @@ skl_compute_ddb(struct drm_atomic_state
>> *state)
>>   	}
>>   
>>   	/*
>> +	 * If Watermark workaround is changed we need to recalculate
>> +	 * watermark values for all active pipes
>> +	 */
>> +	if (intel_state->wm_results.mem_wa != dev_priv-
>>> wm.skl_hw.mem_wa) {
>> +		realloc_pipes = ~0;
>> +		intel_state->wm_results.dirty_pipes = ~0;
>> +	}
> But then skl_ddb_add_affected_planes() only actually adds to the commit
> the planes that have different DDB partitioning. It seems to me that it
> may be possible to have a case where the WA changes and the DDB stays
> the same, so we need to find a way to include every CRTC there. Maybe
> we could just go and call the function that adds every CRTC here.
modified this part of code to calculate WM for all pipes if WA is 
changing & add pipe only if WM values are changing.
We are holding a ww_mutex, so this operation should be race condition free.
>
>> +
>> +	/*
>>   	 * We're not recomputing for the pipes not included in the
>> commit, so
>>   	 * make sure we start with the current state.
>>   	 */
>> @@ -4087,6 +4093,133 @@ skl_compute_ddb(struct drm_atomic_state
>> *state)
>>   	return 0;
>>   }
>>   
>> +static int
>> +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
>> +{
>> +	struct drm_device *dev = state->dev;
>> +	struct drm_i915_private *dev_priv = to_i915(dev);
>> +	struct intel_atomic_state *intel_state =
>> to_intel_atomic_state(state);
>> +	struct memdev_info *memdev_info = &dev_priv->memdev_info;
>> +	struct skl_wm_values *results = &intel_state->wm_results;
>> +	struct drm_crtc *crtc;
>> +	struct drm_crtc_state *cstate;
>> +	int active_pipes = 0;
>> +	uint32_t max_pipe_bw_kbps = 0, total_pipe_bw_kbps;
>> +	int display_bw_percentage;
>> +	bool y_tile_enabled = false;
>> +	int ret, i;
>> +
>> +	if (!intel_needs_memory_bw_wa(intel_state)) {
>> +		results->mem_wa = WATERMARK_WA_NONE;
>> +		return 0;
>> +	}
>> +
>> +	if (!memdev_info->valid)
>> +		goto exit;
> There's no reason to use a label if it's only called here. Just move
> all that code to the if statement.
done
>
>
>> +
>> +	ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
>> +			state->acquire_ctx);
>> +	if (ret)
>> +		return ret;
> Why are we getting this here? Why this lock specifically? What's it
> protecting?
>
> Doc says:
> - ww mutex protecting connector state and routing
> - protects connector->encoder and encoder->crtc links
>
> We're not touching any of that here.
Created a new ww_mutex lock to prevent wm related calculation, when it 
may affect other CRTC's also.
using that now instead of connection_mutex.
>
>
>> +
>> +	memcpy(results->pipe_bw_kbps, dev_priv-
>>> wm.skl_hw.pipe_bw_kbps,
>> +			sizeof(results->pipe_bw_kbps));
>> +	memcpy(results->pipe_ytiled, dev_priv-
>>> wm.skl_hw.pipe_ytiled,
>> +			sizeof(results->pipe_ytiled));
>> +
>> +	for_each_crtc_in_state(state, crtc, cstate, i) {
>> +		struct drm_plane *plane;
>> +		const struct drm_plane_state *pstate;
>> +		int active_planes = 0;
>> +		uint32_t max_plane_bw_kbps = 0;
>> +
>> +		results->pipe_ytiled[i] = false;
>> +
>> +		if (!cstate->active) {
>> +			results->pipe_bw_kbps[i] = 0;
>> +			continue;
>> +		}
>> +
>> +		drm_atomic_crtc_state_for_each_plane_state(plane,
>> pstate,
>> +								csta
>> te) {
>> +			struct drm_framebuffer *fb;
>> +			uint32_t plane_bw_kbps;
>> +			enum plane_id id = to_intel_plane(plane)-
>>> id;
>> +
>> +			if (!pstate->visible)
>> +				continue;
>> +
>> +			if (WARN_ON(!pstate->fb))
>> +				continue;
>> +
>> +			if (id == PLANE_CURSOR)
>> +				continue;
>> +
>> +			fb = pstate->fb;
>> +			if ((fb->modifier == I915_FORMAT_MOD_Y_TILED
>> ||
>> +				fb->modifier ==
>> I915_FORMAT_MOD_Yf_TILED))
>> +				results->pipe_ytiled[i] = true;
>> +
>> +			plane_bw_kbps =
>> skl_adjusted_plane_pixel_rate(
>> +						to_intel_crtc_state(
>> cstate),
>> +						to_intel_plane_state
>> (pstate));
>> +			max_plane_bw_kbps = max(plane_bw_kbps,
>> +							max_plane_bw
>> _kbps);
>> +			active_planes++;
>> +		}
>> +		results->pipe_bw_kbps[i] = max_plane_bw_kbps *
>> active_planes;
>> +	}
>> +
>> +	for_each_pipe(dev_priv, i) {
>> +		if (results->pipe_bw_kbps[i]) {
>> +			max_pipe_bw_kbps = max(max_pipe_bw_kbps,
>> +					results->pipe_bw_kbps[i]);
>> +			active_pipes++;
>> +		}
>> +		if (results->pipe_ytiled[i])
>> +			y_tile_enabled = true;
>> +	}
>> +
>> +	total_pipe_bw_kbps = max_pipe_bw_kbps * active_pipes;
>> +	display_bw_percentage = DIV_ROUND_UP_ULL(total_pipe_bw_kbps
>> * 100,
>> +						memdev_info-
>>> bandwidth_kbps);
> Shouldn't this be just DIV_ROUND_UP()? I don't see 64 bit variables
> here, nor the possibility of overflows.
If we have 3 4K pannels & each have 3 planes enabled with scaling then 
this value theoretically may go beyond 32bits,
type-casted the variable now to 64bit.
536MHz * 3 planes * 3 pipes * pipe_scaler_ratio * plane_scaler_ratio * 
100 = 536000 * 3 * 3 * (1.9 *1.9  (hscale & vscale)) * (1.9 * 1.9 (pipe 
hscale & vscale)) * 100 = 6286685040 = 0x1 76B7 3370

>
>
>> +
>> +	/*
>> +	 * If there is any Ytile plane enabled and arbitrated
>> display
>> +	 * bandwidth > 20% of raw system memory bandwidth
>> +	 * Enale Y-tile related WA
>> +	 *
>> +	 * If memory is dual channel single rank, Xtile limit = 35%,
>> else Xtile
>> +	 * limit = 60%
>> +	 * If there is no Ytile plane enabled and
>> +	 * arbitrated display bandwidth > Xtile limit
>> +	 * Enable X-tile realated WA
>> +	 */
>> +	if (y_tile_enabled && (display_bw_percentage > 20))
>> +		results->mem_wa = WATERMARK_WA_Y_TILED;
>> +	else {
>> +		int x_tile_percentage = 60;
>> +		enum rank rank = DRAM_RANK_SINGLE;
>> +
>> +		if (memdev_info->rank != DRAM_RANK_INVALID)
>> +			rank = memdev_info->rank;
> I really think that the previous patch should prevent this. If
> memdev_info->rank is invalid then memdev_info->valid should also be
> false and we'd have returned at the beginning of the function. With
> that, this part of the code could be simplified a little bit since then
> we'd have no need to choose a default rank.
>
done

Regards,
-Mahesh
>
>> +
>> +		if ((rank == DRAM_RANK_SINGLE) &&
>> +					(memdev_info->num_channels
>> == 2))
>> +			x_tile_percentage = 35;
>> +
>> +		if (display_bw_percentage > x_tile_percentage)
>> +			results->mem_wa = WATERMARK_WA_X_TILED;
>> +	}
>> +
>> +	return 0;
>> +
>> +exit:
>> +	results->mem_wa = WATERMARK_WA_Y_TILED;
>> +	return 0;
>> +}
>> +
>> +
> Two blank lines here.
>
>
>>   static void
>>   skl_copy_wm_for_pipe(struct skl_wm_values *dst,
>>   		     struct skl_wm_values *src,
>> @@ -4162,6 +4295,10 @@ skl_compute_wm(struct drm_atomic_state *state)
>>   	/* Clear all dirty flags */
>>   	results->dirty_pipes = 0;
>>   
>> +	ret = skl_compute_memory_bandwidth_wm_wa(state);
>> +	if (ret)
>> +		return ret;
>> +
>>   	ret = skl_compute_ddb(state);
>>   	if (ret)
>>   		return ret;
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 69213a4..3126259 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1216,6 +1216,13 @@  enum intel_sbi_destination {
 	SBI_MPHY,
 };
 
+/* SKL+ Watermark arbitrated display bandwidth Workarounds */
+enum watermark_memory_wa {
+	WATERMARK_WA_NONE,
+	WATERMARK_WA_X_TILED,
+	WATERMARK_WA_Y_TILED,
+};
+
 #define QUIRK_PIPEA_FORCE (1<<0)
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
@@ -1788,6 +1795,10 @@  struct skl_ddb_allocation {
 
 struct skl_wm_values {
 	unsigned dirty_pipes;
+	/* any WaterMark memory workaround Required */
+	enum watermark_memory_wa mem_wa;
+	uint32_t pipe_bw_kbps[I915_MAX_PIPES];
+	bool pipe_ytiled[I915_MAX_PIPES];
 	struct skl_ddb_allocation ddb;
 };
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 5d11002..f8da488 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14574,6 +14574,28 @@  static void intel_atomic_track_fbs(struct drm_atomic_state *state)
 				  to_intel_plane(plane)->frontbuffer_bit);
 }
 
+static void
+intel_update_wm_bw_wa(struct drm_atomic_state *state)
+{
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct drm_i915_private *dev_priv = to_i915(state->dev);
+	const struct drm_crtc *crtc;
+	const struct drm_crtc_state *cstate;
+	struct skl_wm_values *results = &intel_state->wm_results;
+	struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw;
+	int i;
+
+	if (!IS_GEN9(dev_priv))
+		return;
+
+	for_each_crtc_in_state(state, crtc, cstate, i) {
+		hw_vals->pipe_bw_kbps[i] = results->pipe_bw_kbps[i];
+		hw_vals->pipe_ytiled[i] = results->pipe_ytiled[i];
+	}
+
+	hw_vals->mem_wa = results->mem_wa;
+}
+
 /**
  * intel_atomic_commit - commit validated state object
  * @dev: DRM device
@@ -14614,6 +14636,8 @@  static int intel_atomic_commit(struct drm_device *dev,
 	intel_shared_dpll_commit(state);
 	intel_atomic_track_fbs(state);
 
+	intel_update_wm_bw_wa(state);
+
 	if (intel_state->modeset) {
 		memcpy(dev_priv->min_pixclk, intel_state->min_pixclk,
 		       sizeof(intel_state->min_pixclk));
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index cc8fc84..fda6e1e 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2878,11 +2878,7 @@  bool ilk_disable_lp_wm(struct drm_device *dev)
 
 #define SKL_SAGV_BLOCK_TIME	30 /* µs */
 
-/*
- * FIXME: We still don't have the proper code detect if we need to apply the WA,
- * so assume we'll always need it in order to avoid underruns.
- */
-static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
+static bool intel_needs_memory_bw_wa(struct intel_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 
@@ -3056,7 +3052,7 @@  bool intel_can_enable_sagv(struct drm_atomic_state *state)
 
 		latency = dev_priv->wm.skl_latency[level];
 
-		if (skl_needs_memory_bw_wa(intel_state) &&
+		if (intel_needs_memory_bw_wa(intel_state) &&
 		    plane->base.state->fb->modifier ==
 		    I915_FORMAT_MOD_X_TILED)
 			latency += 15;
@@ -3584,7 +3580,7 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	uint32_t y_min_scanlines;
 	struct intel_atomic_state *state =
 		to_intel_atomic_state(cstate->base.state);
-	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
+	enum watermark_memory_wa mem_wa;
 	bool y_tiled, x_tiled;
 
 	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) {
@@ -3600,7 +3596,8 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled)
 		latency += 4;
 
-	if (apply_memory_bw_wa && x_tiled)
+	mem_wa = state->wm_results.mem_wa;
+	if (mem_wa != WATERMARK_WA_NONE && x_tiled)
 		latency += 15;
 
 	width = drm_rect_width(&intel_pstate->base.src) >> 16;
@@ -3635,7 +3632,7 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 		y_min_scanlines = 4;
 	}
 
-	if (apply_memory_bw_wa)
+	if (mem_wa == WATERMARK_WA_Y_TILED)
 		y_min_scanlines *= 2;
 
 	plane_bytes_per_line = width * cpp;
@@ -4063,6 +4060,15 @@  skl_compute_ddb(struct drm_atomic_state *state)
 	}
 
 	/*
+	 * If Watermark workaround is changed we need to recalculate
+	 * watermark values for all active pipes
+	 */
+	if (intel_state->wm_results.mem_wa != dev_priv->wm.skl_hw.mem_wa) {
+		realloc_pipes = ~0;
+		intel_state->wm_results.dirty_pipes = ~0;
+	}
+
+	/*
 	 * We're not recomputing for the pipes not included in the commit, so
 	 * make sure we start with the current state.
 	 */
@@ -4087,6 +4093,133 @@  skl_compute_ddb(struct drm_atomic_state *state)
 	return 0;
 }
 
+static int
+skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
+{
+	struct drm_device *dev = state->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct memdev_info *memdev_info = &dev_priv->memdev_info;
+	struct skl_wm_values *results = &intel_state->wm_results;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *cstate;
+	int active_pipes = 0;
+	uint32_t max_pipe_bw_kbps = 0, total_pipe_bw_kbps;
+	int display_bw_percentage;
+	bool y_tile_enabled = false;
+	int ret, i;
+
+	if (!intel_needs_memory_bw_wa(intel_state)) {
+		results->mem_wa = WATERMARK_WA_NONE;
+		return 0;
+	}
+
+	if (!memdev_info->valid)
+		goto exit;
+
+	ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
+			state->acquire_ctx);
+	if (ret)
+		return ret;
+
+	memcpy(results->pipe_bw_kbps, dev_priv->wm.skl_hw.pipe_bw_kbps,
+			sizeof(results->pipe_bw_kbps));
+	memcpy(results->pipe_ytiled, dev_priv->wm.skl_hw.pipe_ytiled,
+			sizeof(results->pipe_ytiled));
+
+	for_each_crtc_in_state(state, crtc, cstate, i) {
+		struct drm_plane *plane;
+		const struct drm_plane_state *pstate;
+		int active_planes = 0;
+		uint32_t max_plane_bw_kbps = 0;
+
+		results->pipe_ytiled[i] = false;
+
+		if (!cstate->active) {
+			results->pipe_bw_kbps[i] = 0;
+			continue;
+		}
+
+		drm_atomic_crtc_state_for_each_plane_state(plane, pstate,
+								cstate) {
+			struct drm_framebuffer *fb;
+			uint32_t plane_bw_kbps;
+			enum plane_id id = to_intel_plane(plane)->id;
+
+			if (!pstate->visible)
+				continue;
+
+			if (WARN_ON(!pstate->fb))
+				continue;
+
+			if (id == PLANE_CURSOR)
+				continue;
+
+			fb = pstate->fb;
+			if ((fb->modifier == I915_FORMAT_MOD_Y_TILED ||
+				fb->modifier == I915_FORMAT_MOD_Yf_TILED))
+				results->pipe_ytiled[i] = true;
+
+			plane_bw_kbps = skl_adjusted_plane_pixel_rate(
+						to_intel_crtc_state(cstate),
+						to_intel_plane_state(pstate));
+			max_plane_bw_kbps = max(plane_bw_kbps,
+							max_plane_bw_kbps);
+			active_planes++;
+		}
+		results->pipe_bw_kbps[i] = max_plane_bw_kbps * active_planes;
+	}
+
+	for_each_pipe(dev_priv, i) {
+		if (results->pipe_bw_kbps[i]) {
+			max_pipe_bw_kbps = max(max_pipe_bw_kbps,
+					results->pipe_bw_kbps[i]);
+			active_pipes++;
+		}
+		if (results->pipe_ytiled[i])
+			y_tile_enabled = true;
+	}
+
+	total_pipe_bw_kbps = max_pipe_bw_kbps * active_pipes;
+	display_bw_percentage = DIV_ROUND_UP_ULL(total_pipe_bw_kbps * 100,
+						memdev_info->bandwidth_kbps);
+
+	/*
+	 * If there is any Ytile plane enabled and arbitrated display
+	 * bandwidth > 20% of raw system memory bandwidth
+	 * Enale Y-tile related WA
+	 *
+	 * If memory is dual channel single rank, Xtile limit = 35%, else Xtile
+	 * limit = 60%
+	 * If there is no Ytile plane enabled and
+	 * arbitrated display bandwidth > Xtile limit
+	 * Enable X-tile realated WA
+	 */
+	if (y_tile_enabled && (display_bw_percentage > 20))
+		results->mem_wa = WATERMARK_WA_Y_TILED;
+	else {
+		int x_tile_percentage = 60;
+		enum rank rank = DRAM_RANK_SINGLE;
+
+		if (memdev_info->rank != DRAM_RANK_INVALID)
+			rank = memdev_info->rank;
+
+		if ((rank == DRAM_RANK_SINGLE) &&
+					(memdev_info->num_channels == 2))
+			x_tile_percentage = 35;
+
+		if (display_bw_percentage > x_tile_percentage)
+			results->mem_wa = WATERMARK_WA_X_TILED;
+	}
+
+	return 0;
+
+exit:
+	results->mem_wa = WATERMARK_WA_Y_TILED;
+	return 0;
+}
+
+
 static void
 skl_copy_wm_for_pipe(struct skl_wm_values *dst,
 		     struct skl_wm_values *src,
@@ -4162,6 +4295,10 @@  skl_compute_wm(struct drm_atomic_state *state)
 	/* Clear all dirty flags */
 	results->dirty_pipes = 0;
 
+	ret = skl_compute_memory_bandwidth_wm_wa(state);
+	if (ret)
+		return ret;
+
 	ret = skl_compute_ddb(state);
 	if (ret)
 		return ret;