diff mbox

[v4,4/8] drm/i915/gen9: WM memory bandwidth related workaround

Message ID 20161013105826.9710-5-mahesh1.kumar@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kumar, Mahesh Oct. 13, 2016, 10:58 a.m. UTC
This patch implemnets Workariunds related to display arbitrated memory
bandwidth. These WA are applicabe for all gen-9 based platforms.

Changes since v1:
 - Rebase on top of Paulo's patch series
Changes since v2:
 - Rebase/rework after addressing Paulo's comments in previous patch

Signed-off-by: "Kumar, Mahesh" <mahesh1.kumar@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |   9 +++
 drivers/gpu/drm/i915/intel_drv.h |  11 +++
 drivers/gpu/drm/i915/intel_pm.c  | 146 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 166 insertions(+)

Comments

Zanoni, Paulo R Nov. 4, 2016, 5:09 p.m. UTC | #1
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:
> This patch implemnets Workariunds related to display arbitrated
> memory
> bandwidth. These WA are applicabe for all gen-9 based platforms.
> 
> Changes since v1:
>  - Rebase on top of Paulo's patch series
> Changes since v2:
>  - Rebase/rework after addressing Paulo's comments in previous patch

A lot of this code has changed since then, so this will need a
significant rebase. In the meantime, I added skl_needs_memory_bw_wa()
and we're now applying the WA by default: we just won't apply the WA
when we're pretty sure we don't need to. This helps avoiding underruns
by default.

See more below.


> 
> Signed-off-by: "Kumar, Mahesh" <mahesh1.kumar@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |   9 +++
>  drivers/gpu/drm/i915/intel_drv.h |  11 +++
>  drivers/gpu/drm/i915/intel_pm.c  | 146
> +++++++++++++++++++++++++++++++++++++++
>  3 files changed, 166 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index adbd9aa..c169360 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1092,6 +1092,13 @@ enum intel_sbi_destination {
>  	SBI_MPHY,
>  };
>  
> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
> +enum watermark_memory_wa {
> +	WATERMARK_WA_NONE,
> +	WATERMARK_WA_X_TILED,
> +	WATERMARK_WA_Y_TILED,
> +};
> +
>  #define QUIRK_PIPEA_FORCE (1<<0)
>  #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>  #define QUIRK_INVERT_BRIGHTNESS (1<<2)
> @@ -1644,6 +1651,8 @@ struct skl_ddb_allocation {
>  
>  struct skl_wm_values {
>  	unsigned dirty_pipes;
> +	/* any WaterMark memory workaround Required */

We can remove this comment since it doesn't say anything the variable
name doesn't.

> +	enum watermark_memory_wa mem_wa;

Now that we have a proper variable in the state struct, it probably
makes sense to just kill skl_needs_memory_bw_wa() and read this
variable when we need to.


>  	struct skl_ddb_allocation ddb;
>  	uint32_t wm_linetime[I915_MAX_PIPES];
>  	uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
> diff --git a/drivers/gpu/drm/i915/intel_drv.h
> b/drivers/gpu/drm/i915/intel_drv.h
> index f48e79a..2c1897b 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1813,6 +1813,17 @@ intel_atomic_get_crtc_state(struct
> drm_atomic_state *state,
>  	return to_intel_crtc_state(crtc_state);
>  }
>  
> +static inline struct intel_crtc_state *
> +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
> +				      struct intel_crtc *crtc)
> +{
> +	struct drm_crtc_state *crtc_state;
> +
> +	crtc_state = drm_atomic_get_existing_crtc_state(state,
> &crtc->base);
> +
> +	return to_intel_crtc_state(crtc_state);

I really don't like the idea of calling to_intel_crtc_state() on a
potentially NULL pointer so the caller of this function will also check
for NULL. Even though it works today, I still think it's unsafe
practice. Please check crtc_state for NULL directly and then return
NULL.

Also, I think this function should be extracted to its own commit, and
we'd probably be able to find some callers in the existing i915 code.


> +}
> +
>  static inline struct intel_plane_state *
>  intel_atomic_get_existing_plane_state(struct drm_atomic_state
> *state,
>  				      struct intel_plane *plane)
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index 84ec6b1..5b8f715 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3589,6 +3589,8 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>  {
>  	struct drm_plane_state *pstate = &intel_pstate->base;
>  	struct drm_framebuffer *fb = pstate->fb;
> +	struct intel_atomic_state *intel_state =
> +			to_intel_atomic_state(cstate->base.state);
>  	uint32_t latency = dev_priv->wm.skl_latency[level];
>  	uint32_t method1, method2;
>  	uint32_t plane_bytes_per_line, plane_blocks_per_line;
> @@ -3598,10 +3600,17 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>  	uint32_t width = 0, height = 0;
>  	uint32_t plane_pixel_rate;
>  	uint32_t y_tile_minimum, y_min_scanlines;
> +	enum watermark_memory_wa mem_wa;
>  
>  	if (latency == 0 || !cstate->base.active || !intel_pstate-
> >base.visible)
>  		return 0;
>  
> +	mem_wa = intel_state ? intel_state->wm_results.mem_wa :
> WATERMARK_WA_NONE;
> +	if (mem_wa != WATERMARK_WA_NONE) {
> +		if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
> +			latency += 15;
> +	}
> +
>  	width = drm_rect_width(&intel_pstate->base.src) >> 16;
>  	height = drm_rect_height(&intel_pstate->base.src) >> 16;
>  
> @@ -3634,6 +3643,9 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>  		y_min_scanlines = 4;
>  	}
>  
> +	if (mem_wa == WATERMARK_WA_Y_TILED)
> +		y_min_scanlines *= 2;

The changes to this function will need to be rebased. But it's
interesting that my interpretation regarding this *= 2 was different.
After an email to the spec authors it seems your interpretation is the
right one...


> +
>  	plane_bytes_per_line = width * cpp;
>  	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
>  	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
> @@ -4075,6 +4087,15 @@ skl_include_affected_pipes(struct
> drm_atomic_state *state)
>  		intel_state->wm_results.dirty_pipes = ~0;
>  	}
>  
> +	/*
> +	 * If Watermark workaround is changed we need to recalculate
> +	 * watermark values for all active pipes
> +	 */
> +	if (intel_state->wm_results.mem_wa != dev_priv-
> >wm.skl_hw.mem_wa) {
> +		realloc_pipes = ~0;
> +		intel_state->wm_results.dirty_pipes = ~0;
> +	}
> +

Things have changed since this patch was written. I'd recommend moving
this to skl_set_memory_bandwidth_wa().


>  	for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
>  		struct intel_crtc_state *cstate;
>  
> @@ -4087,6 +4108,129 @@ skl_include_affected_pipes(struct
> drm_atomic_state *state)
>  }
>  
>  static void
> +skl_set_memory_bandwidth_wm_wa(struct drm_atomic_state *state)

We're not really setting anything here: we're computing. Rename to
skl_compute_wm_memory_bw_wa?


> +{
> +	struct drm_device *dev = state->dev;
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct intel_crtc *intel_crtc;
> +	struct intel_plane_state *intel_pstate;
> +	struct intel_atomic_state *intel_state =
> to_intel_atomic_state(state);
> +	struct memdev_info *memdev_info = &dev_priv->memdev_info;
> +	int num_active_plane, num_active_pipe;

I like to use plural in the number-of variables, since we're counting
the number of active planeS, not the number of the active plane.


> +	uint32_t plane_bw, max_plane_bw, pipe_bw, max_pipe_bw;
> +	uint32_t total_pipe_bw;
> +	uint32_t system_bw = 0;
> +	uint32_t rank;
> +	int x_tile_per;
> +	int display_bw_per;

I read this and kept thinking "x tiles per what?", took me a while to
realize it's percentage. It's probably better to just use a long self-
documenting name here than to use an ambiguous abbreviation.

Also, I'd just go with uint32_t in the percentages too to avoid any
possible problems with the uin32_t calculations.

And perhaps some declarations could be moved to smaller inner scopes
below.


> +	bool y_tile_enabled = false;
> +

if (!platforms_that_require_the_wa) {
	wa = WATERMARK_WA_NONE;
	return;
}

> +	if (!memdev_info->valid)
> +		goto exit;

Our default behavior should be to apply the WA then in doubt, not
to avoid it, so the return value here and in the other error cases
should be WATERAMARK_WA_Y_TILED.

Also, you can avoid the gotos by just setting mem_wa at the beginning
of the function, then you can just "return" later instead of goto.


> +
> +	system_bw = memdev_info->mem_speed_khz * memdev_info-
> >num_channels *
> +				memdev_info->channel_width_bytes;
> +
> +	if (!system_bw)
> +		goto exit;

This shouldn't be possible. Otherwise, the previous patch needs to be
fixed in a way to not allow system_bw to end up as zero. Please either
remove the check or change to "if (WARN_ON(!system_bw))".

> +
> +	max_pipe_bw = 0;
> +	for_each_intel_crtc(dev, intel_crtc) {
> +		struct intel_crtc_state *cstate;
> +		struct intel_plane *plane;
> +
> +		/*
> +		 * If CRTC is part of current atomic commit, get
> crtc state from
> +		 * existing CRTC state. else take the cached CRTC
> state
> +		 */
> +		cstate = NULL;
> +		if (state)

If state is NULL we'll segfault way before this point, so there's no
need for this check.


> +			cstate =
> intel_atomic_get_existing_crtc_state(state,
> +					intel_crtc);
> +		if (!cstate)
> +			cstate = to_intel_crtc_state(intel_crtc-
> >base.state);
> +
> +		if (!cstate->base.active)
> +			continue;
> +
> +		num_active_plane = 0;
> +		max_plane_bw = 0;
> +		for_each_intel_plane_mask(dev, plane, cstate-
> >base.plane_mask) {
> +			struct drm_framebuffer *fb = NULL;
> +
> +			intel_pstate = NULL;
> +			if (state)

Same here: no need to check for state.

> +				intel_pstate =
> +				intel_atomic_get_existing_plane_stat
> e(state,
> +									
> plane);
> +			if (!intel_pstate)
> +				intel_pstate =
> +					to_intel_plane_state(plane-
> >base.state);
> +
> +			WARN_ON(!intel_pstate->base.fb);

if (WARN_ON(!intel_pstate->base.fb))
	return;

Then we can just forget about checking for fb again below.

> +
> +			if (!intel_pstate->base.visible)
> +				continue;

Don't we also need to exclude the cursor here?


> +
> +			fb = intel_pstate->base.fb;
> +			if (fb && (fb->modifier[0] ==
> I915_FORMAT_MOD_Y_TILED ||
> +				fb->modifier[0] ==
> I915_FORMAT_MOD_Yf_TILED))
> +				y_tile_enabled = true;
> +
> +			plane_bw =
> skl_adjusted_plane_pixel_rate(cstate,
> +								inte
> l_pstate);
> +			max_plane_bw = max(plane_bw, max_plane_bw);
> +			num_active_plane++;
> +		}
> +		pipe_bw = max_plane_bw * num_active_plane;
> +		max_pipe_bw = max(pipe_bw, max_pipe_bw);
> +	}
> +
> +	if (intel_state->active_pipe_changes)
> +		num_active_pipe = hweight32(intel_state-
> >active_crtcs);
> +	else
> +		num_active_pipe = hweight32(dev_priv->active_crtcs);

Why don't we just trust intel_state->active_crtcs even when
active_pipe_changes is false?


> +
> +	total_pipe_bw = max_pipe_bw * num_active_pipe;
> +
> +	display_bw_per = DIV_ROUND_UP_ULL(total_pipe_bw * 100,
> system_bw * 1000);

Why ULL?

Also, if everything is KHz as it's supposed to be, the *100 and *1000
don't make sense.


> +
> +	/*
> +	 * If there is any Ytile plane enabled and arbitrated
> display
> +	 * bandwidth > 20% of raw system memory bandwidth
> +	 * Enale Y-tile related WA
> +	 *
> +	 * If memory is dual channel single rank, Xtile limit = 35%,
> else Xtile
> +	 * limit = 60%
> +	 * If there is no Ytile plane enabled and
> +	 * arbitrated display bandwidth > Xtile limit
> +	 * Enable X-tile realated WA
> +	 */
> +	if (y_tile_enabled && (display_bw_per > 20))
> +		intel_state->wm_results.mem_wa =
> WATERMARK_WA_Y_TILED;
> +	else {
> +
> +		if (memdev_info->rank_valid)
> +			rank = memdev_info->rank;
> +		else
> +			rank = DRAM_RANK_DUAL; /* Assume we are dual
> rank */

When in doubt, apply the most restrictive workaround to avoid the
possibility of underruns. So here it's safer to assume
DRAM_RANK_SINGLE.


> +
> +		if ((rank == DRAM_RANK_SINGLE) &&
> +					(memdev_info->num_channels
> == 2))
> +			x_tile_per = 35;
> +		else
> +			x_tile_per = 60;
> +
> +		if (display_bw_per > x_tile_per)
> +			intel_state->wm_results.mem_wa =
> WATERMARK_WA_X_TILED;
> +	}
> +	return;
> +
> +exit:
> +	intel_state->wm_results.mem_wa = WATERMARK_WA_NONE;
> +}
> +
> +static void
>  skl_copy_wm_for_pipe(struct skl_wm_values *dst,
>  		     struct skl_wm_values *src,
>  		     enum pipe pipe)
> @@ -4131,6 +4275,8 @@ skl_compute_wm(struct drm_atomic_state *state)
>  	/* Clear all dirty flags */
>  	results->dirty_pipes = 0;
>  
> +	skl_set_memory_bandwidth_wm_wa(state);
> +
>  	ret = skl_include_affected_pipes(state);
>  	if (ret)
>  		return ret;
Ville Syrjala Nov. 4, 2016, 5:22 p.m. UTC | #2
On Fri, Nov 04, 2016 at 03:09:04PM -0200, Paulo Zanoni wrote:
> Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:
> > This patch implemnets Workariunds related to display arbitrated
> > memory
> > bandwidth. These WA are applicabe for all gen-9 based platforms.
> > 
> > Changes since v1:
> >  - Rebase on top of Paulo's patch series
> > Changes since v2:
> >  - Rebase/rework after addressing Paulo's comments in previous patch
> 
> A lot of this code has changed since then, so this will need a
> significant rebase. In the meantime, I added skl_needs_memory_bw_wa()
> and we're now applying the WA by default: we just won't apply the WA
> when we're pretty sure we don't need to. This helps avoiding underruns
> by default.
> 
> See more below.
> 
> 
> > 
> > Signed-off-by: "Kumar, Mahesh" <mahesh1.kumar@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h  |   9 +++
> >  drivers/gpu/drm/i915/intel_drv.h |  11 +++
> >  drivers/gpu/drm/i915/intel_pm.c  | 146
> > +++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 166 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > b/drivers/gpu/drm/i915/i915_drv.h
> > index adbd9aa..c169360 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1092,6 +1092,13 @@ enum intel_sbi_destination {
> >  	SBI_MPHY,
> >  };
> >  
> > +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
> > +enum watermark_memory_wa {
> > +	WATERMARK_WA_NONE,
> > +	WATERMARK_WA_X_TILED,
> > +	WATERMARK_WA_Y_TILED,
> > +};
> > +
> >  #define QUIRK_PIPEA_FORCE (1<<0)
> >  #define QUIRK_LVDS_SSC_DISABLE (1<<1)
> >  #define QUIRK_INVERT_BRIGHTNESS (1<<2)
> > @@ -1644,6 +1651,8 @@ struct skl_ddb_allocation {
> >  
> >  struct skl_wm_values {
> >  	unsigned dirty_pipes;
> > +	/* any WaterMark memory workaround Required */
> 
> We can remove this comment since it doesn't say anything the variable
> name doesn't.
> 
> > +	enum watermark_memory_wa mem_wa;
> 
> Now that we have a proper variable in the state struct, it probably
> makes sense to just kill skl_needs_memory_bw_wa() and read this
> variable when we need to.
> 
> 
> >  	struct skl_ddb_allocation ddb;
> >  	uint32_t wm_linetime[I915_MAX_PIPES];
> >  	uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > b/drivers/gpu/drm/i915/intel_drv.h
> > index f48e79a..2c1897b 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -1813,6 +1813,17 @@ intel_atomic_get_crtc_state(struct
> > drm_atomic_state *state,
> >  	return to_intel_crtc_state(crtc_state);
> >  }
> >  
> > +static inline struct intel_crtc_state *
> > +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
> > +				      struct intel_crtc *crtc)
> > +{
> > +	struct drm_crtc_state *crtc_state;
> > +
> > +	crtc_state = drm_atomic_get_existing_crtc_state(state,
> > &crtc->base);
> > +
> > +	return to_intel_crtc_state(crtc_state);
> 
> I really don't like the idea of calling to_intel_crtc_state() on a
> potentially NULL pointer so the caller of this function will also check
> for NULL. Even though it works today, I still think it's unsafe
> practice. Please check crtc_state for NULL directly and then return
> NULL.

I want to make this safe by making it a compile error if offsetof(base) != 0.
https://lists.freedesktop.org/archives/intel-gfx/2016-October/108175.html

But I think we want to go further than that patch by adding a bit more
type safety to things. I did play around with this stuff a bit more,
and I have something sitting on a branch, but I didn't quite figure out
what I want to do about const vs. non const yet.

> 
> Also, I think this function should be extracted to its own commit, and
> we'd probably be able to find some callers in the existing i915 code.

I have, on some branch again, _intel_ versions of the for_each_foo_in_state()
macros as well. I think those are going to allow a lot of ugly casting
stuff to disappear. But I think I'll hold off until Maarten's new
iterators go in before I try to send those out.
Kumar, Mahesh Nov. 10, 2016, 5:54 a.m. UTC | #3
Hi,


On Friday 04 November 2016 10:39 PM, Paulo Zanoni wrote:
> Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:
>> This patch implemnets Workariunds related to display arbitrated
>> memory
>> bandwidth. These WA are applicabe for all gen-9 based platforms.
>>
>> Changes since v1:
>>   - Rebase on top of Paulo's patch series
>> Changes since v2:
>>   - Rebase/rework after addressing Paulo's comments in previous patch
> A lot of this code has changed since then, so this will need a
> significant rebase. In the meantime, I added skl_needs_memory_bw_wa()
> and we're now applying the WA by default: we just won't apply the WA
> when we're pretty sure we don't need to. This helps avoiding underruns
> by default.
>
> See more below.
make sense, Will change default WA to WA_T_TILED
>> Signed-off-by: "Kumar, Mahesh" <mahesh1.kumar@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h  |   9 +++
>>   drivers/gpu/drm/i915/intel_drv.h |  11 +++
>>   drivers/gpu/drm/i915/intel_pm.c  | 146
>> +++++++++++++++++++++++++++++++++++++++
>>   3 files changed, 166 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index adbd9aa..c169360 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1092,6 +1092,13 @@ enum intel_sbi_destination {
>>   	SBI_MPHY,
>>   };
>>   
>> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
>> +enum watermark_memory_wa {
>> +	WATERMARK_WA_NONE,
>> +	WATERMARK_WA_X_TILED,
>> +	WATERMARK_WA_Y_TILED,
>> +};
>> +
>>   #define QUIRK_PIPEA_FORCE (1<<0)
>>   #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>>   #define QUIRK_INVERT_BRIGHTNESS (1<<2)
>> @@ -1644,6 +1651,8 @@ struct skl_ddb_allocation {
>>   
>>   struct skl_wm_values {
>>   	unsigned dirty_pipes;
>> +	/* any WaterMark memory workaround Required */
> We can remove this comment since it doesn't say anything the variable
> name doesn't.
>
>> +	enum watermark_memory_wa mem_wa;
> Now that we have a proper variable in the state struct, it probably
> makes sense to just kill skl_needs_memory_bw_wa() and read this
> variable when we need to.
>
>
>>   	struct skl_ddb_allocation ddb;
>>   	uint32_t wm_linetime[I915_MAX_PIPES];
>>   	uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h
>> b/drivers/gpu/drm/i915/intel_drv.h
>> index f48e79a..2c1897b 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -1813,6 +1813,17 @@ intel_atomic_get_crtc_state(struct
>> drm_atomic_state *state,
>>   	return to_intel_crtc_state(crtc_state);
>>   }
>>   
>> +static inline struct intel_crtc_state *
>> +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
>> +				      struct intel_crtc *crtc)
>> +{
>> +	struct drm_crtc_state *crtc_state;
>> +
>> +	crtc_state = drm_atomic_get_existing_crtc_state(state,
>> &crtc->base);
>> +
>> +	return to_intel_crtc_state(crtc_state);
> I really don't like the idea of calling to_intel_crtc_state() on a
> potentially NULL pointer so the caller of this function will also check
> for NULL. Even though it works today, I still think it's unsafe
> practice. Please check crtc_state for NULL directly and then return
> NULL.
>
> Also, I think this function should be extracted to its own commit, and
> we'd probably be able to find some callers in the existing i915 code.
Will extract this function & include NULL check for crtc_state itself
>
>> +}
>> +
>>   static inline struct intel_plane_state *
>>   intel_atomic_get_existing_plane_state(struct drm_atomic_state
>> *state,
>>   				      struct intel_plane *plane)
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c
>> b/drivers/gpu/drm/i915/intel_pm.c
>> index 84ec6b1..5b8f715 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -3589,6 +3589,8 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>>   {
>>   	struct drm_plane_state *pstate = &intel_pstate->base;
>>   	struct drm_framebuffer *fb = pstate->fb;
>> +	struct intel_atomic_state *intel_state =
>> +			to_intel_atomic_state(cstate->base.state);
>>   	uint32_t latency = dev_priv->wm.skl_latency[level];
>>   	uint32_t method1, method2;
>>   	uint32_t plane_bytes_per_line, plane_blocks_per_line;
>> @@ -3598,10 +3600,17 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>>   	uint32_t width = 0, height = 0;
>>   	uint32_t plane_pixel_rate;
>>   	uint32_t y_tile_minimum, y_min_scanlines;
>> +	enum watermark_memory_wa mem_wa;
>>   
>>   	if (latency == 0 || !cstate->base.active || !intel_pstate-
>>> base.visible)
>>   		return 0;
>>   
>> +	mem_wa = intel_state ? intel_state->wm_results.mem_wa :
>> WATERMARK_WA_NONE;
>> +	if (mem_wa != WATERMARK_WA_NONE) {
>> +		if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
>> +			latency += 15;
>> +	}
>> +
>>   	width = drm_rect_width(&intel_pstate->base.src) >> 16;
>>   	height = drm_rect_height(&intel_pstate->base.src) >> 16;
>>   
>> @@ -3634,6 +3643,9 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>>   		y_min_scanlines = 4;
>>   	}
>>   
>> +	if (mem_wa == WATERMARK_WA_Y_TILED)
>> +		y_min_scanlines *= 2;
> The changes to this function will need to be rebased. But it's
> interesting that my interpretation regarding this *= 2 was different.
> After an email to the spec authors it seems your interpretation is the
> right one...
>
>
>> +
>>   	plane_bytes_per_line = width * cpp;
>>   	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
>>   	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
>> @@ -4075,6 +4087,15 @@ skl_include_affected_pipes(struct
>> drm_atomic_state *state)
>>   		intel_state->wm_results.dirty_pipes = ~0;
>>   	}
>>   
>> +	/*
>> +	 * If Watermark workaround is changed we need to recalculate
>> +	 * watermark values for all active pipes
>> +	 */
>> +	if (intel_state->wm_results.mem_wa != dev_priv-
>>> wm.skl_hw.mem_wa) {
>> +		realloc_pipes = ~0;
>> +		intel_state->wm_results.dirty_pipes = ~0;
>> +	}
>> +
> Things have changed since this patch was written. I'd recommend moving
> this to skl_set_memory_bandwidth_wa().
Working on re-basing the patch.
>
>>   	for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
>>   		struct intel_crtc_state *cstate;
>>   
>> @@ -4087,6 +4108,129 @@ skl_include_affected_pipes(struct
>> drm_atomic_state *state)
>>   }
>>   
>>   static void
>> +skl_set_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
> We're not really setting anything here: we're computing. Rename to
> skl_compute_wm_memory_bw_wa?
>
>
>> +{
>> +	struct drm_device *dev = state->dev;
>> +	struct drm_i915_private *dev_priv = to_i915(dev);
>> +	struct intel_crtc *intel_crtc;
>> +	struct intel_plane_state *intel_pstate;
>> +	struct intel_atomic_state *intel_state =
>> to_intel_atomic_state(state);
>> +	struct memdev_info *memdev_info = &dev_priv->memdev_info;
>> +	int num_active_plane, num_active_pipe;
> I like to use plural in the number-of variables, since we're counting
> the number of active planeS, not the number of the active plane.
>
>
>> +	uint32_t plane_bw, max_plane_bw, pipe_bw, max_pipe_bw;
>> +	uint32_t total_pipe_bw;
>> +	uint32_t system_bw = 0;
>> +	uint32_t rank;
>> +	int x_tile_per;
>> +	int display_bw_per;
> I read this and kept thinking "x tiles per what?", took me a while to
> realize it's percentage. It's probably better to just use a long self-
> documenting name here than to use an ambiguous abbreviation.
>
> Also, I'd just go with uint32_t in the percentages too to avoid any
> possible problems with the uin32_t calculations.
>
> And perhaps some declarations could be moved to smaller inner scopes
> below.
>
>
>> +	bool y_tile_enabled = false;
>> +
> if (!platforms_that_require_the_wa) {
> 	wa = WATERMARK_WA_NONE;
> 	return;
> }
this function is not called by any GEN other than GEN9, will it be ok to 
add "if (!GEN9)" check?
>> +	if (!memdev_info->valid)
>> +		goto exit;
> Our default behavior should be to apply the WA then in doubt, not
> to avoid it, so the return value here and in the other error cases
> should be WATERAMARK_WA_Y_TILED.
>
> Also, you can avoid the gotos by just setting mem_wa at the beginning
> of the function, then you can just "return" later instead of goto.
>
>
>> +
>> +	system_bw = memdev_info->mem_speed_khz * memdev_info-
>>> num_channels *
>> +				memdev_info->channel_width_bytes;
>> +
>> +	if (!system_bw)
>> +		goto exit;
> This shouldn't be possible. Otherwise, the previous patch needs to be
> fixed in a way to not allow system_bw to end up as zero. Please either
> remove the check or change to "if (WARN_ON(!system_bw))".
yes, ideally this should not be possible, but what if because of any 
reason BIOS is not writing the register OR we don't have BIOS altogether?

If we add WARN_ON here, it'll flood the logs, we can add WARN_ON_ONCE, 
but we are anyway printing the warning in previous patch, if frequency 
or any other variable calculated is ZERO, IMO will add more prints in 
previous patch instead of adding here.
>
>> +
>> +	max_pipe_bw = 0;
>> +	for_each_intel_crtc(dev, intel_crtc) {
>> +		struct intel_crtc_state *cstate;
>> +		struct intel_plane *plane;
>> +
>> +		/*
>> +		 * If CRTC is part of current atomic commit, get
>> crtc state from
>> +		 * existing CRTC state. else take the cached CRTC
>> state
>> +		 */
>> +		cstate = NULL;
>> +		if (state)
> If state is NULL we'll segfault way before this point, so there's no
> need for this check.
>
>
>> +			cstate =
>> intel_atomic_get_existing_crtc_state(state,
>> +					intel_crtc);
>> +		if (!cstate)
>> +			cstate = to_intel_crtc_state(intel_crtc-
>>> base.state);
>> +
>> +		if (!cstate->base.active)
>> +			continue;
>> +
>> +		num_active_plane = 0;
>> +		max_plane_bw = 0;
>> +		for_each_intel_plane_mask(dev, plane, cstate-
>>> base.plane_mask) {
>> +			struct drm_framebuffer *fb = NULL;
>> +
>> +			intel_pstate = NULL;
>> +			if (state)
> Same here: no need to check for state.
>
>> +				intel_pstate =
>> +				intel_atomic_get_existing_plane_stat
>> e(state,
>> +									
>> plane);
>> +			if (!intel_pstate)
>> +				intel_pstate =
>> +					to_intel_plane_state(plane-
>>> base.state);
>> +
>> +			WARN_ON(!intel_pstate->base.fb);
> if (WARN_ON(!intel_pstate->base.fb))
> 	return;
>
> Then we can just forget about checking for fb again below.
>
>> +
>> +			if (!intel_pstate->base.visible)
>> +				continue;
> Don't we also need to exclude the cursor here?
yes, cursor should be excluded, will add that. thanks,
>
>
>> +
>> +			fb = intel_pstate->base.fb;
>> +			if (fb && (fb->modifier[0] ==
>> I915_FORMAT_MOD_Y_TILED ||
>> +				fb->modifier[0] ==
>> I915_FORMAT_MOD_Yf_TILED))
>> +				y_tile_enabled = true;
>> +
>> +			plane_bw =
>> skl_adjusted_plane_pixel_rate(cstate,
>> +								inte
>> l_pstate);
>> +			max_plane_bw = max(plane_bw, max_plane_bw);
>> +			num_active_plane++;
>> +		}
>> +		pipe_bw = max_plane_bw * num_active_plane;
>> +		max_pipe_bw = max(pipe_bw, max_pipe_bw);
>> +	}
>> +
>> +	if (intel_state->active_pipe_changes)
>> +		num_active_pipe = hweight32(intel_state-
>>> active_crtcs);
>> +	else
>> +		num_active_pipe = hweight32(dev_priv->active_crtcs);
> Why don't we just trust intel_state->active_crtcs even when
> active_pipe_changes is false?
I really didn't dig into that code & reused the code from different 
function. will check into this
>
>
>> +
>> +	total_pipe_bw = max_pipe_bw * num_active_pipe;
>> +
>> +	display_bw_per = DIV_ROUND_UP_ULL(total_pipe_bw * 100,
>> system_bw * 1000);
> Why ULL?
>
> Also, if everything is KHz as it's supposed to be, the *100 and *1000
> don't make sense.
In previous patch I converted the frequency to MHz... need to fix that, 
then *1000 not be required,
*100 is still required to get the percentage.
>
>
>> +
>> +	/*
>> +	 * If there is any Ytile plane enabled and arbitrated
>> display
>> +	 * bandwidth > 20% of raw system memory bandwidth
>> +	 * Enale Y-tile related WA
>> +	 *
>> +	 * If memory is dual channel single rank, Xtile limit = 35%,
>> else Xtile
>> +	 * limit = 60%
>> +	 * If there is no Ytile plane enabled and
>> +	 * arbitrated display bandwidth > Xtile limit
>> +	 * Enable X-tile realated WA
>> +	 */
>> +	if (y_tile_enabled && (display_bw_per > 20))
>> +		intel_state->wm_results.mem_wa =
>> WATERMARK_WA_Y_TILED;
>> +	else {
>> +
>> +		if (memdev_info->rank_valid)
>> +			rank = memdev_info->rank;
>> +		else
>> +			rank = DRAM_RANK_DUAL; /* Assume we are dual
>> rank */
> When in doubt, apply the most restrictive workaround to avoid the
> possibility of underruns. So here it's safer to assume
> DRAM_RANK_SINGLE.
OK...

thanks,

Regards,
-Mahesh
>
>> +
>> +		if ((rank == DRAM_RANK_SINGLE) &&
>> +					(memdev_info->num_channels
>> == 2))
>> +			x_tile_per = 35;
>> +		else
>> +			x_tile_per = 60;
>> +
>> +		if (display_bw_per > x_tile_per)
>> +			intel_state->wm_results.mem_wa =
>> WATERMARK_WA_X_TILED;
>> +	}
>> +	return;
>> +
>> +exit:
>> +	intel_state->wm_results.mem_wa = WATERMARK_WA_NONE;
>> +}
>> +
>> +static void
>>   skl_copy_wm_for_pipe(struct skl_wm_values *dst,
>>   		     struct skl_wm_values *src,
>>   		     enum pipe pipe)
>> @@ -4131,6 +4275,8 @@ skl_compute_wm(struct drm_atomic_state *state)
>>   	/* Clear all dirty flags */
>>   	results->dirty_pipes = 0;
>>   
>> +	skl_set_memory_bandwidth_wm_wa(state);
>> +
>>   	ret = skl_include_affected_pipes(state);
>>   	if (ret)
>>   		return ret;
Zanoni, Paulo R Nov. 16, 2016, 12:36 p.m. UTC | #4
Em Qui, 2016-11-10 às 11:24 +0530, Mahesh Kumar escreveu:
> Hi,
> 
> 
> 
(removed a bunch of stuff here)

> > 
> > > 
> > 
> > 
> > > 
> > > +	bool y_tile_enabled = false;
> > > +
> > if (!platforms_that_require_the_wa) {
> > 	wa = WATERMARK_WA_NONE;
> > 	return;
> > }
> this function is not called by any GEN other than GEN9, will it be ok
> to 
> add "if (!GEN9)" check?

Well, there's Gemnilake support floating around the mailing list now...

> > 
> > > 
> > > +	if (!memdev_info->valid)
> > > +		goto exit;
> > Our default behavior should be to apply the WA then in doubt, not
> > to avoid it, so the return value here and in the other error cases
> > should be WATERAMARK_WA_Y_TILED.
> > 
> > Also, you can avoid the gotos by just setting mem_wa at the
> > beginning
> > of the function, then you can just "return" later instead of goto.
> > 
> > 
> > > 
> > > +
> > > +	system_bw = memdev_info->mem_speed_khz * memdev_info-
> > > > 
> > > > num_channels *
> > > +				memdev_info-
> > > >channel_width_bytes;
> > > +
> > > +	if (!system_bw)
> > > +		goto exit;
> > This shouldn't be possible. Otherwise, the previous patch needs to
> > be
> > fixed in a way to not allow system_bw to end up as zero. Please
> > either
> > remove the check or change to "if (WARN_ON(!system_bw))".
> yes, ideally this should not be possible, but what if because of any 
> reason BIOS is not writing the register OR we don't have BIOS
> altogether?

Then the previous patch should be able to deal with it, and leave us
with memdev_info->valid=false.


> 
> If we add WARN_ON here, it'll flood the logs, we can add
> WARN_ON_ONCE, 
> but we are anyway printing the warning in previous patch, if
> frequency 
> or any other variable calculated is ZERO, IMO will add more prints
> in 
> previous patch instead of adding here.

Usually we add WARNs when we expect them to never ever be triggered. So
instead of making it WARN_ON_ONCE, we should make sure the condition is
never met: if the memory info is bad, set memdev_info->valid to false.


Thanks,
Paulo
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index adbd9aa..c169360 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1092,6 +1092,13 @@  enum intel_sbi_destination {
 	SBI_MPHY,
 };
 
+/* SKL+ Watermark arbitrated display bandwidth Workarounds */
+enum watermark_memory_wa {
+	WATERMARK_WA_NONE,
+	WATERMARK_WA_X_TILED,
+	WATERMARK_WA_Y_TILED,
+};
+
 #define QUIRK_PIPEA_FORCE (1<<0)
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
@@ -1644,6 +1651,8 @@  struct skl_ddb_allocation {
 
 struct skl_wm_values {
 	unsigned dirty_pipes;
+	/* any WaterMark memory workaround Required */
+	enum watermark_memory_wa mem_wa;
 	struct skl_ddb_allocation ddb;
 	uint32_t wm_linetime[I915_MAX_PIPES];
 	uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index f48e79a..2c1897b 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1813,6 +1813,17 @@  intel_atomic_get_crtc_state(struct drm_atomic_state *state,
 	return to_intel_crtc_state(crtc_state);
 }
 
+static inline struct intel_crtc_state *
+intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
+				      struct intel_crtc *crtc)
+{
+	struct drm_crtc_state *crtc_state;
+
+	crtc_state = drm_atomic_get_existing_crtc_state(state, &crtc->base);
+
+	return to_intel_crtc_state(crtc_state);
+}
+
 static inline struct intel_plane_state *
 intel_atomic_get_existing_plane_state(struct drm_atomic_state *state,
 				      struct intel_plane *plane)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 84ec6b1..5b8f715 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3589,6 +3589,8 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 {
 	struct drm_plane_state *pstate = &intel_pstate->base;
 	struct drm_framebuffer *fb = pstate->fb;
+	struct intel_atomic_state *intel_state =
+			to_intel_atomic_state(cstate->base.state);
 	uint32_t latency = dev_priv->wm.skl_latency[level];
 	uint32_t method1, method2;
 	uint32_t plane_bytes_per_line, plane_blocks_per_line;
@@ -3598,10 +3600,17 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	uint32_t width = 0, height = 0;
 	uint32_t plane_pixel_rate;
 	uint32_t y_tile_minimum, y_min_scanlines;
+	enum watermark_memory_wa mem_wa;
 
 	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible)
 		return 0;
 
+	mem_wa = intel_state ? intel_state->wm_results.mem_wa : WATERMARK_WA_NONE;
+	if (mem_wa != WATERMARK_WA_NONE) {
+		if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
+			latency += 15;
+	}
+
 	width = drm_rect_width(&intel_pstate->base.src) >> 16;
 	height = drm_rect_height(&intel_pstate->base.src) >> 16;
 
@@ -3634,6 +3643,9 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 		y_min_scanlines = 4;
 	}
 
+	if (mem_wa == WATERMARK_WA_Y_TILED)
+		y_min_scanlines *= 2;
+
 	plane_bytes_per_line = width * cpp;
 	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
 	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
@@ -4075,6 +4087,15 @@  skl_include_affected_pipes(struct drm_atomic_state *state)
 		intel_state->wm_results.dirty_pipes = ~0;
 	}
 
+	/*
+	 * If Watermark workaround is changed we need to recalculate
+	 * watermark values for all active pipes
+	 */
+	if (intel_state->wm_results.mem_wa != dev_priv->wm.skl_hw.mem_wa) {
+		realloc_pipes = ~0;
+		intel_state->wm_results.dirty_pipes = ~0;
+	}
+
 	for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
 		struct intel_crtc_state *cstate;
 
@@ -4087,6 +4108,129 @@  skl_include_affected_pipes(struct drm_atomic_state *state)
 }
 
 static void
+skl_set_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
+{
+	struct drm_device *dev = state->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_crtc *intel_crtc;
+	struct intel_plane_state *intel_pstate;
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct memdev_info *memdev_info = &dev_priv->memdev_info;
+	int num_active_plane, num_active_pipe;
+	uint32_t plane_bw, max_plane_bw, pipe_bw, max_pipe_bw;
+	uint32_t total_pipe_bw;
+	uint32_t system_bw = 0;
+	uint32_t rank;
+	int x_tile_per;
+	int display_bw_per;
+	bool y_tile_enabled = false;
+
+	if (!memdev_info->valid)
+		goto exit;
+
+	system_bw = memdev_info->mem_speed_khz * memdev_info->num_channels *
+				memdev_info->channel_width_bytes;
+
+	if (!system_bw)
+		goto exit;
+
+	max_pipe_bw = 0;
+	for_each_intel_crtc(dev, intel_crtc) {
+		struct intel_crtc_state *cstate;
+		struct intel_plane *plane;
+
+		/*
+		 * If CRTC is part of current atomic commit, get crtc state from
+		 * existing CRTC state. else take the cached CRTC state
+		 */
+		cstate = NULL;
+		if (state)
+			cstate = intel_atomic_get_existing_crtc_state(state,
+					intel_crtc);
+		if (!cstate)
+			cstate = to_intel_crtc_state(intel_crtc->base.state);
+
+		if (!cstate->base.active)
+			continue;
+
+		num_active_plane = 0;
+		max_plane_bw = 0;
+		for_each_intel_plane_mask(dev, plane, cstate->base.plane_mask) {
+			struct drm_framebuffer *fb = NULL;
+
+			intel_pstate = NULL;
+			if (state)
+				intel_pstate =
+				intel_atomic_get_existing_plane_state(state,
+									plane);
+			if (!intel_pstate)
+				intel_pstate =
+					to_intel_plane_state(plane->base.state);
+
+			WARN_ON(!intel_pstate->base.fb);
+
+			if (!intel_pstate->base.visible)
+				continue;
+
+			fb = intel_pstate->base.fb;
+			if (fb && (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
+				fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED))
+				y_tile_enabled = true;
+
+			plane_bw = skl_adjusted_plane_pixel_rate(cstate,
+								intel_pstate);
+			max_plane_bw = max(plane_bw, max_plane_bw);
+			num_active_plane++;
+		}
+		pipe_bw = max_plane_bw * num_active_plane;
+		max_pipe_bw = max(pipe_bw, max_pipe_bw);
+	}
+
+	if (intel_state->active_pipe_changes)
+		num_active_pipe = hweight32(intel_state->active_crtcs);
+	else
+		num_active_pipe = hweight32(dev_priv->active_crtcs);
+
+	total_pipe_bw = max_pipe_bw * num_active_pipe;
+
+	display_bw_per = DIV_ROUND_UP_ULL(total_pipe_bw * 100, system_bw * 1000);
+
+	/*
+	 * If there is any Ytile plane enabled and arbitrated display
+	 * bandwidth > 20% of raw system memory bandwidth
+	 * Enale Y-tile related WA
+	 *
+	 * If memory is dual channel single rank, Xtile limit = 35%, else Xtile
+	 * limit = 60%
+	 * If there is no Ytile plane enabled and
+	 * arbitrated display bandwidth > Xtile limit
+	 * Enable X-tile realated WA
+	 */
+	if (y_tile_enabled && (display_bw_per > 20))
+		intel_state->wm_results.mem_wa = WATERMARK_WA_Y_TILED;
+	else {
+
+		if (memdev_info->rank_valid)
+			rank = memdev_info->rank;
+		else
+			rank = DRAM_RANK_DUAL; /* Assume we are dual rank */
+
+		if ((rank == DRAM_RANK_SINGLE) &&
+					(memdev_info->num_channels == 2))
+			x_tile_per = 35;
+		else
+			x_tile_per = 60;
+
+		if (display_bw_per > x_tile_per)
+			intel_state->wm_results.mem_wa = WATERMARK_WA_X_TILED;
+	}
+	return;
+
+exit:
+	intel_state->wm_results.mem_wa = WATERMARK_WA_NONE;
+}
+
+static void
 skl_copy_wm_for_pipe(struct skl_wm_values *dst,
 		     struct skl_wm_values *src,
 		     enum pipe pipe)
@@ -4131,6 +4275,8 @@  skl_compute_wm(struct drm_atomic_state *state)
 	/* Clear all dirty flags */
 	results->dirty_pipes = 0;
 
+	skl_set_memory_bandwidth_wm_wa(state);
+
 	ret = skl_include_affected_pipes(state);
 	if (ret)
 		return ret;