diff mbox

[v3,5/9] drm/i915/gen9: WM memory bandwidth related workaround

Message ID 20160909080106.17506-6-mahesh1.kumar@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kumar, Mahesh Sept. 9, 2016, 8:01 a.m. UTC
From: Mahesh Kumar <mahesh1.kumar@intel.com>

This patch implemnets Workarounds related to display arbitrated memory
bandwidth. These WA are applicabe for all gen-9 based platforms.

Changes since v1:
 - Rebase on top of Paulo's patch series

Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |   9 +++
 drivers/gpu/drm/i915/intel_drv.h |  11 +++
 drivers/gpu/drm/i915/intel_pm.c  | 145 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 165 insertions(+)

Comments

Maarten Lankhorst Sept. 12, 2016, 11:02 a.m. UTC | #1
Op 09-09-16 om 10:01 schreef Kumar, Mahesh:
> From: Mahesh Kumar <mahesh1.kumar@intel.com>
>
> This patch implemnets Workarounds related to display arbitrated memory
> bandwidth. These WA are applicabe for all gen-9 based platforms.
>
> Changes since v1:
>  - Rebase on top of Paulo's patch series
>
> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |   9 +++
>  drivers/gpu/drm/i915/intel_drv.h |  11 +++
>  drivers/gpu/drm/i915/intel_pm.c  | 145 +++++++++++++++++++++++++++++++++++++++
>  3 files changed, 165 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4313992..4737a0e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1074,6 +1074,13 @@ enum intel_sbi_destination {
>  	SBI_MPHY,
>  };
>  
> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
> +enum watermark_memory_wa {
> +	WATERMARK_WA_NONE,
> +	WATERMARK_WA_X_TILED,
> +	WATERMARK_WA_Y_TILED,
> +};
> +
>  #define QUIRK_PIPEA_FORCE (1<<0)
>  #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>  #define QUIRK_INVERT_BRIGHTNESS (1<<2)
> @@ -1623,6 +1630,8 @@ struct skl_ddb_allocation {
>  
>  struct skl_wm_values {
>  	unsigned dirty_pipes;
> +	/* any WaterMark memory workaround Required */
> +	enum watermark_memory_wa mem_wa;
>  	struct skl_ddb_allocation ddb;
>  	uint32_t wm_linetime[I915_MAX_PIPES];
>  	uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 6cd7e8a..66cb46c 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1800,6 +1800,17 @@ intel_atomic_get_crtc_state(struct drm_atomic_state *state,
>  	return to_intel_crtc_state(crtc_state);
>  }
>  
> +static inline struct intel_crtc_state *
> +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
> +				      struct intel_crtc *crtc)
> +{
> +	struct drm_crtc_state *crtc_state;
> +
> +	crtc_state = drm_atomic_get_existing_crtc_state(state, &crtc->base);
> +
> +	return to_intel_crtc_state(crtc_state);
> +}
> +
>  static inline struct intel_plane_state *
>  intel_atomic_get_existing_plane_state(struct drm_atomic_state *state,
>  				      struct intel_plane *plane)
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 7c70e07..0ec328b 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3589,6 +3589,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>  {
>  	struct drm_plane_state *pstate = &intel_pstate->base;
>  	struct drm_framebuffer *fb = pstate->fb;
> +	struct intel_atomic_state *intel_state =
> +			to_intel_atomic_state(cstate->base.state);
>  	uint32_t latency = dev_priv->wm.skl_latency[level];
>  	uint32_t method1, method2;
>  	uint32_t plane_bytes_per_line, plane_blocks_per_line;
> @@ -3602,10 +3604,17 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>  	struct skl_wm_level *result = &pipe_wm->wm[level];
>  	uint16_t *out_blocks = &result->plane_res_b[id];
>  	uint8_t *out_lines = &result->plane_res_l[id];
> +	enum watermark_memory_wa mem_wa;
>  
>  	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible)
>  		return 0;
>  
> +	mem_wa = intel_state ? intel_state->wm_results.mem_wa : WATERMARK_WA_NONE;
> +	if (mem_wa != WATERMARK_WA_NONE) {
> +		if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
> +			latency += 15;
> +	}
> +
>  	width = drm_rect_width(&intel_pstate->base.src) >> 16;
>  	height = drm_rect_height(&intel_pstate->base.src) >> 16;
>  
> @@ -3637,6 +3646,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>  		y_min_scanlines = 4;
>  	}
>  
> +	if (mem_wa == WATERMARK_WA_Y_TILED)
> +		y_min_scanlines *= 2;
> +
>  	plane_bytes_per_line = width * cpp;
>  	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
>  	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
I don't have y_min_scanlines in nightly? What is this series based on?
It doesn't apply cleanly at least..

~Maarten
Maarten Lankhorst Sept. 12, 2016, 11:12 a.m. UTC | #2
Op 12-09-16 om 13:02 schreef Maarten Lankhorst:
> Op 09-09-16 om 10:01 schreef Kumar, Mahesh:
>> From: Mahesh Kumar <mahesh1.kumar@intel.com>
>>
>> This patch implemnets Workarounds related to display arbitrated memory
>> bandwidth. These WA are applicabe for all gen-9 based platforms.
>>
>> Changes since v1:
>>  - Rebase on top of Paulo's patch series
>>
>> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h  |   9 +++
>>  drivers/gpu/drm/i915/intel_drv.h |  11 +++
>>  drivers/gpu/drm/i915/intel_pm.c  | 145 +++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 165 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 4313992..4737a0e 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1074,6 +1074,13 @@ enum intel_sbi_destination {
>>  	SBI_MPHY,
>>  };
>>  
>> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
>> +enum watermark_memory_wa {
>> +	WATERMARK_WA_NONE,
>> +	WATERMARK_WA_X_TILED,
>> +	WATERMARK_WA_Y_TILED,
>> +};
>> +
>>  #define QUIRK_PIPEA_FORCE (1<<0)
>>  #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>>  #define QUIRK_INVERT_BRIGHTNESS (1<<2)
>> @@ -1623,6 +1630,8 @@ struct skl_ddb_allocation {
>>  
>>  struct skl_wm_values {
>>  	unsigned dirty_pipes;
>> +	/* any WaterMark memory workaround Required */
>> +	enum watermark_memory_wa mem_wa;
>>  	struct skl_ddb_allocation ddb;
>>  	uint32_t wm_linetime[I915_MAX_PIPES];
>>  	uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
>> index 6cd7e8a..66cb46c 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -1800,6 +1800,17 @@ intel_atomic_get_crtc_state(struct drm_atomic_state *state,
>>  	return to_intel_crtc_state(crtc_state);
>>  }
>>  
>> +static inline struct intel_crtc_state *
>> +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
>> +				      struct intel_crtc *crtc)
>> +{
>> +	struct drm_crtc_state *crtc_state;
>> +
>> +	crtc_state = drm_atomic_get_existing_crtc_state(state, &crtc->base);
>> +
>> +	return to_intel_crtc_state(crtc_state);
>> +}
>> +
>>  static inline struct intel_plane_state *
>>  intel_atomic_get_existing_plane_state(struct drm_atomic_state *state,
>>  				      struct intel_plane *plane)
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> index 7c70e07..0ec328b 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -3589,6 +3589,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>>  {
>>  	struct drm_plane_state *pstate = &intel_pstate->base;
>>  	struct drm_framebuffer *fb = pstate->fb;
>> +	struct intel_atomic_state *intel_state =
>> +			to_intel_atomic_state(cstate->base.state);
>>  	uint32_t latency = dev_priv->wm.skl_latency[level];
>>  	uint32_t method1, method2;
>>  	uint32_t plane_bytes_per_line, plane_blocks_per_line;
>> @@ -3602,10 +3604,17 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>>  	struct skl_wm_level *result = &pipe_wm->wm[level];
>>  	uint16_t *out_blocks = &result->plane_res_b[id];
>>  	uint8_t *out_lines = &result->plane_res_l[id];
>> +	enum watermark_memory_wa mem_wa;
>>  
>>  	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible)
>>  		return 0;
>>  
>> +	mem_wa = intel_state ? intel_state->wm_results.mem_wa : WATERMARK_WA_NONE;
>> +	if (mem_wa != WATERMARK_WA_NONE) {
>> +		if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
>> +			latency += 15;
>> +	}
>> +
>>  	width = drm_rect_width(&intel_pstate->base.src) >> 16;
>>  	height = drm_rect_height(&intel_pstate->base.src) >> 16;
>>  
>> @@ -3637,6 +3646,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
>>  		y_min_scanlines = 4;
>>  	}
>>  
>> +	if (mem_wa == WATERMARK_WA_Y_TILED)
>> +		y_min_scanlines *= 2;
>> +
>>  	plane_bytes_per_line = width * cpp;
>>  	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
>>  	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
> I don't have y_min_scanlines in nightly? What is this series based on?
> It doesn't apply cleanly at least..
Ah nevermind, applies on top of Paulo's series.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4313992..4737a0e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1074,6 +1074,13 @@  enum intel_sbi_destination {
 	SBI_MPHY,
 };
 
+/* SKL+ Watermark arbitrated display bandwidth Workarounds */
+enum watermark_memory_wa {
+	WATERMARK_WA_NONE,
+	WATERMARK_WA_X_TILED,
+	WATERMARK_WA_Y_TILED,
+};
+
 #define QUIRK_PIPEA_FORCE (1<<0)
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
@@ -1623,6 +1630,8 @@  struct skl_ddb_allocation {
 
 struct skl_wm_values {
 	unsigned dirty_pipes;
+	/* any WaterMark memory workaround Required */
+	enum watermark_memory_wa mem_wa;
 	struct skl_ddb_allocation ddb;
 	uint32_t wm_linetime[I915_MAX_PIPES];
 	uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 6cd7e8a..66cb46c 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1800,6 +1800,17 @@  intel_atomic_get_crtc_state(struct drm_atomic_state *state,
 	return to_intel_crtc_state(crtc_state);
 }
 
+static inline struct intel_crtc_state *
+intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
+				      struct intel_crtc *crtc)
+{
+	struct drm_crtc_state *crtc_state;
+
+	crtc_state = drm_atomic_get_existing_crtc_state(state, &crtc->base);
+
+	return to_intel_crtc_state(crtc_state);
+}
+
 static inline struct intel_plane_state *
 intel_atomic_get_existing_plane_state(struct drm_atomic_state *state,
 				      struct intel_plane *plane)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 7c70e07..0ec328b 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3589,6 +3589,8 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 {
 	struct drm_plane_state *pstate = &intel_pstate->base;
 	struct drm_framebuffer *fb = pstate->fb;
+	struct intel_atomic_state *intel_state =
+			to_intel_atomic_state(cstate->base.state);
 	uint32_t latency = dev_priv->wm.skl_latency[level];
 	uint32_t method1, method2;
 	uint32_t plane_bytes_per_line, plane_blocks_per_line;
@@ -3602,10 +3604,17 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	struct skl_wm_level *result = &pipe_wm->wm[level];
 	uint16_t *out_blocks = &result->plane_res_b[id];
 	uint8_t *out_lines = &result->plane_res_l[id];
+	enum watermark_memory_wa mem_wa;
 
 	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible)
 		return 0;
 
+	mem_wa = intel_state ? intel_state->wm_results.mem_wa : WATERMARK_WA_NONE;
+	if (mem_wa != WATERMARK_WA_NONE) {
+		if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
+			latency += 15;
+	}
+
 	width = drm_rect_width(&intel_pstate->base.src) >> 16;
 	height = drm_rect_height(&intel_pstate->base.src) >> 16;
 
@@ -3637,6 +3646,9 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 		y_min_scanlines = 4;
 	}
 
+	if (mem_wa == WATERMARK_WA_Y_TILED)
+		y_min_scanlines *= 2;
+
 	plane_bytes_per_line = width * cpp;
 	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
 	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
@@ -4041,6 +4053,15 @@  skl_include_affected_pipes(struct drm_atomic_state *state)
 		intel_state->wm_results.dirty_pipes = ~0;
 	}
 
+	/*
+	 * If Watermark workaround is changed we need to recalculate
+	 * watermark values for all active pipes
+	 */
+	if (intel_state->wm_results.mem_wa != dev_priv->wm.skl_hw.mem_wa) {
+		realloc_pipes = ~0;
+		intel_state->wm_results.dirty_pipes = ~0;
+	}
+
 	for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
 		struct intel_crtc_state *cstate;
 
@@ -4057,6 +4078,128 @@  skl_include_affected_pipes(struct drm_atomic_state *state)
 }
 
 static void
+skl_set_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
+{
+	struct drm_device *dev = state->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_crtc *intel_crtc;
+	struct intel_plane_state *intel_pstate;
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	int num_active_plane, num_active_pipe;
+	uint32_t plane_bw, max_plane_bw, pipe_bw, max_pipe_bw;
+	uint32_t total_pipe_bw;
+	uint32_t system_bw = 0;
+	uint8_t num_channel, data_width, rank;
+	int x_tile_per;
+	int display_bw_per;
+	bool y_tile_enabled = false;
+
+	if (!dev_priv->memdev_info.valid)
+		goto exit;
+
+	num_channel = dev_priv->memdev_info.num_channel;
+	data_width = dev_priv->memdev_info.data_width;
+	system_bw = dev_priv->memdev_info.mem_speed * num_channel * data_width;
+
+	if (!system_bw)
+		goto exit;
+
+	max_pipe_bw = 0;
+	for_each_intel_crtc(dev, intel_crtc) {
+		struct intel_crtc_state *cstate;
+		struct intel_plane *plane;
+
+		/*
+		 * If CRTC is part of current atomic commit, get crtc state from
+		 * existing CRTC state. else take the cached CRTC state
+		 */
+		cstate = NULL;
+		if (state)
+			cstate = intel_atomic_get_existing_crtc_state(state,
+					intel_crtc);
+		if (!cstate)
+			cstate = to_intel_crtc_state(intel_crtc->base.state);
+
+		if (!cstate->base.active)
+			continue;
+
+		num_active_plane = 0;
+		max_plane_bw = 0;
+		for_each_intel_plane_mask(dev, plane, cstate->base.plane_mask) {
+			struct drm_framebuffer *fb = NULL;
+
+			intel_pstate = NULL;
+			if (state)
+				intel_pstate =
+				intel_atomic_get_existing_plane_state(state,
+									plane);
+			if (!intel_pstate)
+				intel_pstate =
+					to_intel_plane_state(plane->base.state);
+
+			WARN_ON(!intel_pstate->base.fb);
+
+			if (!intel_pstate->base.visible)
+				continue;
+
+			fb = intel_pstate->base.fb;
+			if (fb && (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
+				fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED))
+				y_tile_enabled = true;
+
+			plane_bw = skl_adjusted_plane_pixel_rate(cstate,
+								intel_pstate);
+			max_plane_bw = max(plane_bw, max_plane_bw);
+			num_active_plane++;
+		}
+		pipe_bw = max_plane_bw * num_active_plane;
+		max_pipe_bw = max(pipe_bw, max_pipe_bw);
+	}
+
+	if (intel_state->active_pipe_changes)
+		num_active_pipe = hweight32(intel_state->active_crtcs);
+	else
+		num_active_pipe = hweight32(dev_priv->active_crtcs);
+
+	total_pipe_bw = max_pipe_bw * num_active_pipe;
+
+	display_bw_per = DIV_ROUND_UP_ULL(total_pipe_bw * 100, system_bw * 1000);
+
+	/*
+	 * If there is any Ytile plane enabled and arbitrated display
+	 * bandwidth > 20% of raw system memory bandwidth
+	 * Enale Y-tile related WA
+	 *
+	 * If memory is dual channel single rank, Xtile limit = 35%, else Xtile
+	 * limit = 60%
+	 * If there is no Ytile plane enabled and
+	 * arbitrated display bandwidth > Xtile limit
+	 * Enable X-tile realated WA
+	 */
+	if (y_tile_enabled && (display_bw_per > 20))
+		intel_state->wm_results.mem_wa = WATERMARK_WA_Y_TILED;
+	else {
+
+		if (dev_priv->memdev_info.rank_valid)
+			rank = dev_priv->memdev_info.rank;
+		else
+			rank = DRAM_DUAL_RANK; /* Assume we are dual rank */
+
+		if ((rank == DRAM_SINGLE_RANK) && (num_channel == 2))
+			x_tile_per = 35;
+		else
+			x_tile_per = 60;
+
+		if (display_bw_per > x_tile_per)
+			intel_state->wm_results.mem_wa = WATERMARK_WA_X_TILED;
+	}
+	return;
+
+exit:
+	intel_state->wm_results.mem_wa = WATERMARK_WA_NONE;
+}
+
+static void
 skl_copy_wm_for_pipe(struct skl_wm_values *dst,
 		     struct skl_wm_values *src,
 		     enum pipe pipe)
@@ -4101,6 +4244,8 @@  skl_compute_wm(struct drm_atomic_state *state)
 	/* Clear all dirty flags */
 	results->dirty_pipes = 0;
 
+	skl_set_memory_bandwidth_wm_wa(state);
+
 	ret = skl_include_affected_pipes(state);
 	if (ret)
 		return ret;