diff mbox

[13/17] drm/i915/icl: Enable 2nd DBuf slice only when needed

Message ID 20180123190536.11208-14-paulo.r.zanoni@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zanoni, Paulo R Jan. 23, 2018, 7:05 p.m. UTC
From: Mahesh Kumar <mahesh1.kumar@intel.com>

ICL has two slices of DBuf, each slice of size 1024 blocks.
We should not always enable slice-2. It should be enabled only if
display total required BW is > 12GBps OR more than 1 pipes are enabled.

Changes since V1:
 - typecast total_data_rate to u64 before multiplication to solve any
   possible overflow (Rodrigo)
 - fix where skl_wm_get_hw_state was memsetting ddb, resulting
   enabled_slices to become zero
 - Fix the logic of calculating ddb_size
Changes since V2:
 - If no-crtc is part of commit required_slices will have value "0",
   don't try to disable DBuf slice.

Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c    | 12 +++++++
 drivers/gpu/drm/i915/intel_drv.h        |  6 ++++
 drivers/gpu/drm/i915/intel_pm.c         | 64 +++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_runtime_pm.c | 47 +++++++++++++++++++++---
 4 files changed, 110 insertions(+), 19 deletions(-)

Comments

James Ausmus Jan. 25, 2018, 10:56 p.m. UTC | #1
On Tue, Jan 23, 2018 at 05:05:32PM -0200, Paulo Zanoni wrote:
> From: Mahesh Kumar <mahesh1.kumar@intel.com>
> 
> ICL has two slices of DBuf, each slice of size 1024 blocks.
> We should not always enable slice-2. It should be enabled only if
> display total required BW is > 12GBps OR more than 1 pipes are enabled.
> 
> Changes since V1:
>  - typecast total_data_rate to u64 before multiplication to solve any
>    possible overflow (Rodrigo)
>  - fix where skl_wm_get_hw_state was memsetting ddb, resulting
>    enabled_slices to become zero
>  - Fix the logic of calculating ddb_size
> Changes since V2:
>  - If no-crtc is part of commit required_slices will have value "0",
>    don't try to disable DBuf slice.
> 
> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_display.c    | 12 +++++++
>  drivers/gpu/drm/i915/intel_drv.h        |  6 ++++
>  drivers/gpu/drm/i915/intel_pm.c         | 64 +++++++++++++++++++++++++--------
>  drivers/gpu/drm/i915/intel_runtime_pm.c | 47 +++++++++++++++++++++---
>  4 files changed, 110 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index bad3b112ac3e..3eb2359c221b 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -12117,6 +12117,8 @@ static void skl_update_crtcs(struct drm_atomic_state *state)
>  	bool progress;
>  	enum pipe pipe;
>  	int i;
> +	uint8_t hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
> +	uint8_t required_slices = intel_state->wm_results.ddb.enabled_slices;
>  
>  	const struct skl_ddb_entry *entries[I915_MAX_PIPES] = {};
>  
> @@ -12125,6 +12127,11 @@ static void skl_update_crtcs(struct drm_atomic_state *state)
>  		if (new_crtc_state->active)
>  			entries[i] = &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb;
>  
> +	/* If 2nd DBuf slice required, enable it here */
> +	if (INTEL_GEN(dev_priv) >= 11 && required_slices &&
> +					 required_slices > hw_enabled_slices)
> +		icl_dbuf_slices_update(dev_priv, required_slices);
> +
>  	/*
>  	 * Whenever the number of active pipes changes, we need to make sure we
>  	 * update the pipes in the right order so that their ddb allocations
> @@ -12175,6 +12182,11 @@ static void skl_update_crtcs(struct drm_atomic_state *state)
>  			progress = true;
>  		}
>  	} while (progress);
> +
> +	/* If 2nd DBuf slice is no more required disable it */
> +	if (INTEL_GEN(dev_priv) >= 11 && required_slices &&
> +					 required_slices < hw_enabled_slices)
> +		icl_dbuf_slices_update(dev_priv, required_slices);
>  }
>  
>  static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index c5d6092aca41..d4639a161fe3 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -140,6 +140,10 @@
>  #define KHz(x) (1000 * (x))
>  #define MHz(x) KHz(1000 * (x))
>  
> +#define KBps(x) (1000 * (x))
> +#define MBps(x) KBps(1000 * (x))
> +#define GBps(x) ((uint64_t) 1000 * MBps((x)))
> +
>  /*
>   * Display related stuff
>   */
> @@ -1890,6 +1894,8 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
>  					enum intel_display_power_domain domain);
>  void intel_display_power_put(struct drm_i915_private *dev_priv,
>  			     enum intel_display_power_domain domain);
> +void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
> +			    uint8_t req_slices);
>  
>  static inline void
>  assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index e8d98857c208..d4cd631377da 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3767,9 +3767,42 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
>  	return true;
>  }
>  
> +static unsigned int intel_get_ddb_size(struct drm_i915_private *dev_priv,
> +				       const struct intel_crtc_state *cstate,
> +				       const unsigned int total_data_rate,
> +				       const int num_active,
> +				       struct skl_ddb_allocation *ddb)
> +{
> +	const struct drm_display_mode *adjusted_mode;
> +	u64 total_data_bw;
> +	u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
> +
> +	WARN_ON(ddb_size == 0);
> +
> +	if (INTEL_GEN(dev_priv) < 11)
> +		return ddb_size - 4; /* 4 blocks for bypass path allocation */
> +
> +	adjusted_mode = &cstate->base.adjusted_mode;
> +	total_data_bw = (u64)total_data_rate * drm_mode_vrefresh(adjusted_mode);
> +
> +	/*
> +	 * 12GB/s is maximum BW supported by single DBuf slice.
> +	 */
> +	if (total_data_bw >= GBps(12) || num_active > 1)
> +		ddb->enabled_slices = 2;
> +	else {
> +		ddb->enabled_slices = 1;
> +		ddb_size /= 2;
> +	}
> +
> +	return ddb_size;
> +}
> +
>  static void
>  skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
>  				   const struct intel_crtc_state *cstate,
> +				   const unsigned int total_data_rate,
> +				   struct skl_ddb_allocation *ddb,
>  				   struct skl_ddb_entry *alloc, /* out */
>  				   int *num_active /* out */)
>  {
> @@ -3792,11 +3825,8 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
>  	else
>  		*num_active = hweight32(dev_priv->active_crtcs);
>  
> -	ddb_size = INTEL_INFO(dev_priv)->ddb_size;
> -	WARN_ON(ddb_size == 0);
> -
> -	if (INTEL_GEN(dev_priv) < 11)
> -		ddb_size -= 4; /* 4 blocks for bypass path allocation */
> +	ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate,
> +				      *num_active, ddb);
>  
>  	/*
>  	 * If the state doesn't change the active CRTC's, then there's
> @@ -4235,7 +4265,14 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>  		return 0;
>  	}
>  
> -	skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active);
> +	total_data_rate = skl_get_total_relative_data_rate(cstate,
> +							   plane_data_rate,
> +							   plane_y_data_rate);
> +	if (total_data_rate == 0)
> +		return 0;
> +
> +	skl_ddb_get_pipe_allocation_limits(dev, cstate, total_data_rate, ddb,
> +					   alloc, &num_active);
>  	alloc_size = skl_ddb_entry_size(alloc);
>  	if (alloc_size == 0)
>  		return 0;
> @@ -4270,12 +4307,6 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>  	 *
>  	 * FIXME: we may not allocate every single block here.
>  	 */
> -	total_data_rate = skl_get_total_relative_data_rate(cstate,
> -							   plane_data_rate,
> -							   plane_y_data_rate);
> -	if (total_data_rate == 0)
> -		return 0;
> -
>  	start = alloc->start;
>  	for_each_plane_id_on_crtc(intel_crtc, plane_id) {
>  		unsigned int data_rate, y_data_rate;
> @@ -5068,7 +5099,6 @@ skl_copy_wm_for_pipe(struct skl_wm_values *dst,
>  	       sizeof(dst->ddb.y_plane[pipe]));
>  	memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe],
>  	       sizeof(dst->ddb.plane[pipe]));
> -	dst->ddb.enabled_slices = src->ddb.enabled_slices;
>  }
>  
>  static void
> @@ -5381,8 +5411,12 @@ void skl_wm_get_hw_state(struct drm_device *dev)
>  		/* Fully recompute DDB on first atomic commit */
>  		dev_priv->wm.distrust_bios_wm = true;
>  	} else {
> -		/* Easy/common case; just sanitize DDB now if everything off */
> -		memset(ddb, 0, sizeof(*ddb));
> +		/*
> +		 * Easy/common case; just sanitize DDB now if everything off
> +		 * Keep dbuf slice info intact
> +		 */
> +		memset(ddb->plane, 0, sizeof(ddb->plane));
> +		memset(ddb->y_plane, 0, sizeof(ddb->y_plane));
>  	}
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
> index 13c8dad95b84..a67a57738740 100644
> --- a/drivers/gpu/drm/i915/intel_runtime_pm.c
> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
> @@ -2610,10 +2610,49 @@ static void gen9_dbuf_disable(struct drm_i915_private *dev_priv)
>  		DRM_ERROR("DBuf power disable timeout!\n");
>  }
>  
> -/*
> - * TODO: we shouldn't always enable DBUF_CTL_S2, we should only enable it when
> - * needed and keep it disabled as much as possible.
> - */
> +static uint8_t intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
> +{
> +	if (INTEL_GEN(dev_priv) < 11)
> +		return 1;
> +	return 2;
> +}
> +
> +void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
> +			    uint8_t req_slices)
> +{
> +	uint8_t hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
> +	u32 val;
> +
> +	if (req_slices > intel_dbuf_max_slices(dev_priv)) {
> +		DRM_ERROR("Invalid number of dbuf slices requested\n");
> +		return;
> +	}
> +
> +	if (req_slices == hw_enabled_slices)
> +		return;
> +
> +	val = I915_READ(DBUF_CTL_S2);
> +	if (req_slices > hw_enabled_slices) {
> +		I915_WRITE(DBUF_CTL_S2, val | DBUF_POWER_REQUEST);
> +		POSTING_READ(DBUF_CTL_S2);
> +
> +		udelay(10);
> +		if (!(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
> +			DRM_ERROR("DBuf power enable timeout\n");
> +		else
> +			dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices;
> +	} else {
> +		I915_WRITE(DBUF_CTL_S2, val & ~DBUF_POWER_REQUEST);
> +		POSTING_READ(DBUF_CTL_S2);
> +
> +		udelay(10);
> +		if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
> +			DRM_ERROR("DBuf power disable timeout!\n");
> +		else
> +			dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices;

It feels a little ugly to be duplicating the Write->Posting
Read->Wait->Read->Error pattern across this function and
icl_dbuf_enable/disable below - not to mention that we have the same
pattern up above in gen9_dbuf_enable/disable. Maybe we should refactor
the gen9 versions in to helpers that take a slice number, and then reuse
those for ICL as well?


> +	}
> +}
> +
>  static void icl_dbuf_enable(struct drm_i915_private *dev_priv)
>  {
>  	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST);
> -- 
> 2.14.3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index bad3b112ac3e..3eb2359c221b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12117,6 +12117,8 @@  static void skl_update_crtcs(struct drm_atomic_state *state)
 	bool progress;
 	enum pipe pipe;
 	int i;
+	uint8_t hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
+	uint8_t required_slices = intel_state->wm_results.ddb.enabled_slices;
 
 	const struct skl_ddb_entry *entries[I915_MAX_PIPES] = {};
 
@@ -12125,6 +12127,11 @@  static void skl_update_crtcs(struct drm_atomic_state *state)
 		if (new_crtc_state->active)
 			entries[i] = &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb;
 
+	/* If 2nd DBuf slice required, enable it here */
+	if (INTEL_GEN(dev_priv) >= 11 && required_slices &&
+					 required_slices > hw_enabled_slices)
+		icl_dbuf_slices_update(dev_priv, required_slices);
+
 	/*
 	 * Whenever the number of active pipes changes, we need to make sure we
 	 * update the pipes in the right order so that their ddb allocations
@@ -12175,6 +12182,11 @@  static void skl_update_crtcs(struct drm_atomic_state *state)
 			progress = true;
 		}
 	} while (progress);
+
+	/* If 2nd DBuf slice is no more required disable it */
+	if (INTEL_GEN(dev_priv) >= 11 && required_slices &&
+					 required_slices < hw_enabled_slices)
+		icl_dbuf_slices_update(dev_priv, required_slices);
 }
 
 static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index c5d6092aca41..d4639a161fe3 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -140,6 +140,10 @@ 
 #define KHz(x) (1000 * (x))
 #define MHz(x) KHz(1000 * (x))
 
+#define KBps(x) (1000 * (x))
+#define MBps(x) KBps(1000 * (x))
+#define GBps(x) ((uint64_t) 1000 * MBps((x)))
+
 /*
  * Display related stuff
  */
@@ -1890,6 +1894,8 @@  bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
 					enum intel_display_power_domain domain);
 void intel_display_power_put(struct drm_i915_private *dev_priv,
 			     enum intel_display_power_domain domain);
+void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
+			    uint8_t req_slices);
 
 static inline void
 assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index e8d98857c208..d4cd631377da 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3767,9 +3767,42 @@  bool intel_can_enable_sagv(struct drm_atomic_state *state)
 	return true;
 }
 
+static unsigned int intel_get_ddb_size(struct drm_i915_private *dev_priv,
+				       const struct intel_crtc_state *cstate,
+				       const unsigned int total_data_rate,
+				       const int num_active,
+				       struct skl_ddb_allocation *ddb)
+{
+	const struct drm_display_mode *adjusted_mode;
+	u64 total_data_bw;
+	u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
+
+	WARN_ON(ddb_size == 0);
+
+	if (INTEL_GEN(dev_priv) < 11)
+		return ddb_size - 4; /* 4 blocks for bypass path allocation */
+
+	adjusted_mode = &cstate->base.adjusted_mode;
+	total_data_bw = (u64)total_data_rate * drm_mode_vrefresh(adjusted_mode);
+
+	/*
+	 * 12GB/s is maximum BW supported by single DBuf slice.
+	 */
+	if (total_data_bw >= GBps(12) || num_active > 1)
+		ddb->enabled_slices = 2;
+	else {
+		ddb->enabled_slices = 1;
+		ddb_size /= 2;
+	}
+
+	return ddb_size;
+}
+
 static void
 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
 				   const struct intel_crtc_state *cstate,
+				   const unsigned int total_data_rate,
+				   struct skl_ddb_allocation *ddb,
 				   struct skl_ddb_entry *alloc, /* out */
 				   int *num_active /* out */)
 {
@@ -3792,11 +3825,8 @@  skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
 	else
 		*num_active = hweight32(dev_priv->active_crtcs);
 
-	ddb_size = INTEL_INFO(dev_priv)->ddb_size;
-	WARN_ON(ddb_size == 0);
-
-	if (INTEL_GEN(dev_priv) < 11)
-		ddb_size -= 4; /* 4 blocks for bypass path allocation */
+	ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate,
+				      *num_active, ddb);
 
 	/*
 	 * If the state doesn't change the active CRTC's, then there's
@@ -4235,7 +4265,14 @@  skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 		return 0;
 	}
 
-	skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active);
+	total_data_rate = skl_get_total_relative_data_rate(cstate,
+							   plane_data_rate,
+							   plane_y_data_rate);
+	if (total_data_rate == 0)
+		return 0;
+
+	skl_ddb_get_pipe_allocation_limits(dev, cstate, total_data_rate, ddb,
+					   alloc, &num_active);
 	alloc_size = skl_ddb_entry_size(alloc);
 	if (alloc_size == 0)
 		return 0;
@@ -4270,12 +4307,6 @@  skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 	 *
 	 * FIXME: we may not allocate every single block here.
 	 */
-	total_data_rate = skl_get_total_relative_data_rate(cstate,
-							   plane_data_rate,
-							   plane_y_data_rate);
-	if (total_data_rate == 0)
-		return 0;
-
 	start = alloc->start;
 	for_each_plane_id_on_crtc(intel_crtc, plane_id) {
 		unsigned int data_rate, y_data_rate;
@@ -5068,7 +5099,6 @@  skl_copy_wm_for_pipe(struct skl_wm_values *dst,
 	       sizeof(dst->ddb.y_plane[pipe]));
 	memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe],
 	       sizeof(dst->ddb.plane[pipe]));
-	dst->ddb.enabled_slices = src->ddb.enabled_slices;
 }
 
 static void
@@ -5381,8 +5411,12 @@  void skl_wm_get_hw_state(struct drm_device *dev)
 		/* Fully recompute DDB on first atomic commit */
 		dev_priv->wm.distrust_bios_wm = true;
 	} else {
-		/* Easy/common case; just sanitize DDB now if everything off */
-		memset(ddb, 0, sizeof(*ddb));
+		/*
+		 * Easy/common case; just sanitize DDB now if everything off
+		 * Keep dbuf slice info intact
+		 */
+		memset(ddb->plane, 0, sizeof(ddb->plane));
+		memset(ddb->y_plane, 0, sizeof(ddb->y_plane));
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 13c8dad95b84..a67a57738740 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -2610,10 +2610,49 @@  static void gen9_dbuf_disable(struct drm_i915_private *dev_priv)
 		DRM_ERROR("DBuf power disable timeout!\n");
 }
 
-/*
- * TODO: we shouldn't always enable DBUF_CTL_S2, we should only enable it when
- * needed and keep it disabled as much as possible.
- */
+static uint8_t intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
+{
+	if (INTEL_GEN(dev_priv) < 11)
+		return 1;
+	return 2;
+}
+
+void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
+			    uint8_t req_slices)
+{
+	uint8_t hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
+	u32 val;
+
+	if (req_slices > intel_dbuf_max_slices(dev_priv)) {
+		DRM_ERROR("Invalid number of dbuf slices requested\n");
+		return;
+	}
+
+	if (req_slices == hw_enabled_slices)
+		return;
+
+	val = I915_READ(DBUF_CTL_S2);
+	if (req_slices > hw_enabled_slices) {
+		I915_WRITE(DBUF_CTL_S2, val | DBUF_POWER_REQUEST);
+		POSTING_READ(DBUF_CTL_S2);
+
+		udelay(10);
+		if (!(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
+			DRM_ERROR("DBuf power enable timeout\n");
+		else
+			dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices;
+	} else {
+		I915_WRITE(DBUF_CTL_S2, val & ~DBUF_POWER_REQUEST);
+		POSTING_READ(DBUF_CTL_S2);
+
+		udelay(10);
+		if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
+			DRM_ERROR("DBuf power disable timeout!\n");
+		else
+			dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices;
+	}
+}
+
 static void icl_dbuf_enable(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST);