[v5] drm/i915: Enable second dbuf slice for ICL and TGL
diff mbox series

Message ID 20191107212234.21815-1-stanislav.lisovskiy@intel.com
State New
Headers show
Series
  • [v5] drm/i915: Enable second dbuf slice for ICL and TGL
Related show

Commit Message

Stanislav Lisovskiy Nov. 7, 2019, 9:22 p.m. UTC
Also implemented algorithm for choosing DBuf slice configuration
based on active pipes, pipe ratio as stated in BSpec 12716.

Now pipe allocation still stays proportional to pipe width as before,
however within allowed DBuf slice for this particular configuration.

v2: Remove unneeded check from commit as ddb enabled slices might
    now differ from hw state.

v3: - Added new field "supported_slices" to ddb to separate max
      supported slices vs currently enabled, to avoid confusion.
    - Removed obsolete comments and code related to DBuf(Matthew Roper).
    - Some code style and long line removal(Matthew Roper).
    - Added WARN_ON to new ddb range offset calc function(Matthew Roper).
    - Removed platform specific call to calc pipe ratio from ddb
      allocation function and fixed the return value(Matthew Roper)
    - Refactored DBUF slice allocation table to improve readability
    - Added DBUF slice allocation for TGL as well.
    - ICL(however not TGL) seems to voluntarily enable second DBuf slice
      after pm suspend/resume causing a mismatch failure, because we
      update DBuf slices only if we do a modeset, however this check
      is done always. Fixed it to be done only when modeset for ICL.

v4: - Now enabled slices is not actually a number, but a bitmask,
      because we might need to enable second slice only and number
      of slices would still 1 and that current code doesn't allow.
    - Remove redundant duplicate code to have some unified way of
      enabling dbuf slices instead of hardcoding.

v5: - Fix failing gen9_assert_dbuf_enabled as it was naively thinking
      that we have only one DBUF_CTL slice. Now another version for
      gen11 will check both slices as only second one can be enabled,
      to keep CI happy.

Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Cc: Matthew Roper <matthew.d.roper@intel.com>
Cc: Ville Syrjälä <ville.syrjala@intel.com>
Cc: James Ausmus <james.ausmus@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c  |  22 +-
 .../drm/i915/display/intel_display_power.c    | 103 +++--
 .../drm/i915/display/intel_display_power.h    |   2 +
 drivers/gpu/drm/i915/i915_drv.c               |   5 +
 drivers/gpu/drm/i915/i915_drv.h               |   3 +-
 drivers/gpu/drm/i915/i915_reg.h               |   2 +
 drivers/gpu/drm/i915/intel_pm.c               | 391 ++++++++++++++++--
 7 files changed, 441 insertions(+), 87 deletions(-)

Comments

Matt Roper Nov. 8, 2019, 12:52 a.m. UTC | #1
On Thu, Nov 07, 2019 at 11:22:34PM +0200, Stanislav Lisovskiy wrote:
> Also implemented algorithm for choosing DBuf slice configuration
> based on active pipes, pipe ratio as stated in BSpec 12716.
> 
> Now pipe allocation still stays proportional to pipe width as before,
> however within allowed DBuf slice for this particular configuration.
> 
> v2: Remove unneeded check from commit as ddb enabled slices might
>     now differ from hw state.
> 
> v3: - Added new field "supported_slices" to ddb to separate max
>       supported slices vs currently enabled, to avoid confusion.
>     - Removed obsolete comments and code related to DBuf(Matthew Roper).
>     - Some code style and long line removal(Matthew Roper).
>     - Added WARN_ON to new ddb range offset calc function(Matthew Roper).
>     - Removed platform specific call to calc pipe ratio from ddb
>       allocation function and fixed the return value(Matthew Roper)
>     - Refactored DBUF slice allocation table to improve readability
>     - Added DBUF slice allocation for TGL as well.
>     - ICL(however not TGL) seems to voluntarily enable second DBuf slice
>       after pm suspend/resume causing a mismatch failure, because we
>       update DBuf slices only if we do a modeset, however this check
>       is done always. Fixed it to be done only when modeset for ICL.
> 
> v4: - Now enabled slices is not actually a number, but a bitmask,
>       because we might need to enable second slice only and number
>       of slices would still 1 and that current code doesn't allow.
>     - Remove redundant duplicate code to have some unified way of
>       enabling dbuf slices instead of hardcoding.
> 
> v5: - Fix failing gen9_assert_dbuf_enabled as it was naively thinking
>       that we have only one DBUF_CTL slice. Now another version for
>       gen11 will check both slices as only second one can be enabled,
>       to keep CI happy.
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> Cc: Matthew Roper <matthew.d.roper@intel.com>
> Cc: Ville Syrjälä <ville.syrjala@intel.com>
> Cc: James Ausmus <james.ausmus@intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_display.c  |  22 +-
>  .../drm/i915/display/intel_display_power.c    | 103 +++--
>  .../drm/i915/display/intel_display_power.h    |   2 +
>  drivers/gpu/drm/i915/i915_drv.c               |   5 +
>  drivers/gpu/drm/i915/i915_drv.h               |   3 +-
>  drivers/gpu/drm/i915/i915_reg.h               |   2 +
>  drivers/gpu/drm/i915/intel_pm.c               | 391 ++++++++++++++++--
>  7 files changed, 441 insertions(+), 87 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> index 876fc25968bf..ee63da18772a 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -13386,8 +13386,9 @@ static void verify_wm_state(struct intel_crtc *crtc,
>  	sw_ddb = &dev_priv->wm.skl_hw.ddb;
>  
>  	if (INTEL_GEN(dev_priv) >= 11 &&
> -	    hw->ddb.enabled_slices != sw_ddb->enabled_slices)
> -		DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n",
> +	    hw->ddb.enabled_slices != sw_ddb->enabled_slices &&
> +	    needs_modeset(new_crtc_state))
> +		DRM_ERROR("mismatch in DBUF Slices (expected %x, got %x)\n",
>  			  sw_ddb->enabled_slices,
>  			  hw->ddb.enabled_slices);

Is this related to your changelog comment above?

    - ICL(however not TGL) seems to voluntarily enable second DBuf slice
      after pm suspend/resume causing a mismatch failure, because we
      update DBuf slices only if we do a modeset, however this check
      is done always. Fixed it to be done only when modeset for ICL.

It looks like you added an icl_dbuf_slices_restore() function that tries
to sanitize the dbuf programming on resume; is that not sufficient to
address this?

>  
> @@ -14613,16 +14614,21 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
>  	int i;
>  	u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
>  	u8 required_slices = state->wm_results.ddb.enabled_slices;
> -	struct skl_ddb_entry entries[I915_MAX_PIPES] = {};
> +	u8 slices_union = hw_enabled_slices | required_slices;
> +	u8 slices_intersection = slices_union & required_slices;

Isn't this the same as "slices_intersection = required_slices?"

>  
> +	struct skl_ddb_entry entries[I915_MAX_PIPES] = {};

Did you mean to move this line (and add a space before it but not
after)?

>  	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i)
>  		/* ignore allocations for crtc's that have been turned off. */
>  		if (new_crtc_state->hw.active)
>  			entries[i] = old_crtc_state->wm.skl.ddb;
>  
> -	/* If 2nd DBuf slice required, enable it here */
> -	if (INTEL_GEN(dev_priv) >= 11 && required_slices > hw_enabled_slices)
> -		icl_dbuf_slices_update(dev_priv, required_slices);
> +	DRM_DEBUG_KMS("DBuf req slices %d hw slices %d\n",
> +		      required_slices, hw_enabled_slices);
> +
> +	/* If some other DBuf slice required, enable it here */

Minor nitpick, but it might be more clear to write this as "Enable dbuf
slices necessary to support both the old configuration and new
configuration."  I.e., that helps clarify why we don't just jump
straight to the final dbuf configuration here.

> +	if (INTEL_GEN(dev_priv) >= 11 && required_slices != hw_enabled_slices)
> +		icl_dbuf_slices_update(dev_priv, slices_union);
>  
>  	/*
>  	 * Whenever the number of active pipes changes, we need to make sure we
> @@ -14682,8 +14688,8 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
>  	} while (progress);
>  
>  	/* If 2nd DBuf slice is no more required disable it */

A corresponding change could be made to this comment (and removing the
"2nd" terminology would future-proof the comment in case a future
platform has more than just two slices).

> -	if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices)
> -		icl_dbuf_slices_update(dev_priv, required_slices);
> +	if (INTEL_GEN(dev_priv) >= 11 && required_slices != hw_enabled_slices)
> +		icl_dbuf_slices_update(dev_priv, slices_intersection);

As noted above, slices_intersection is equivalent to required_slices.

>  }
>  
>  static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
> index ce1b64f4dd44..c04ebb19bb4f 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_power.c
> +++ b/drivers/gpu/drm/i915/display/intel_display_power.c
> @@ -1040,6 +1040,21 @@ static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv)
>  	     "Unexpected DBuf power power state (0x%08x)\n", tmp);
>  }
>  
> +static void gen11_assert_dbuf_enabled(struct drm_i915_private *dev_priv)
> +{
> +	u32 tmp = I915_READ(DBUF_CTL_S1);
> +	u32 tmp2 = I915_READ(DBUF_CTL_S2);
> +	bool s1_enabled = (tmp & (DBUF_POWER_STATE | DBUF_POWER_REQUEST)) ==
> +			  (DBUF_POWER_STATE | DBUF_POWER_REQUEST);
> +
> +	bool s2_enabled = (tmp2 & (DBUF_POWER_STATE | DBUF_POWER_REQUEST)) ==
> +			  (DBUF_POWER_STATE | DBUF_POWER_REQUEST);
> +
> +	WARN(!s1_enabled && !s2_enabled,
> +	     "Unexpected DBuf power power state (0x%08x, 0x%08x)\n", tmp,
> +								     tmp2);
> +}
> +
>  static void gen9_disable_dc_states(struct drm_i915_private *dev_priv)
>  {
>  	struct intel_cdclk_state cdclk_state = {};
> @@ -1055,7 +1070,10 @@ static void gen9_disable_dc_states(struct drm_i915_private *dev_priv)
>  	/* Can't read out voltage_level so can't use intel_cdclk_changed() */
>  	WARN_ON(intel_cdclk_needs_modeset(&dev_priv->cdclk.hw, &cdclk_state));
>  
> -	gen9_assert_dbuf_enabled(dev_priv);
> +	if (INTEL_GEN(dev_priv) >= 11)
> +		gen11_assert_dbuf_enabled(dev_priv);
> +	else
> +		gen9_assert_dbuf_enabled(dev_priv);
>  
>  	if (IS_GEN9_LP(dev_priv))
>  		bxt_verify_ddi_phy_power_wells(dev_priv);
> @@ -4254,31 +4272,53 @@ static void gen9_dbuf_disable(struct drm_i915_private *dev_priv)
>  	intel_dbuf_slice_set(dev_priv, DBUF_CTL, false);
>  }
>  
> -static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
> +u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)

I realize your patch didn't introduce it, and it really doesn't matter,
but when I see u8 or similar used as a return type I sort of expect the
value to be a bitmask.  When it's just a count I don't see much reason
to not just use a vanilla 'int,' even when the expected return values
are likely to be small.  But it doesn't really matter, so I'll leave it
up to you whether you want to change it.

Of course if we move this into intel_device_info (as I suggest farther
down), we could just drop this function completely.

>  {
>  	if (INTEL_GEN(dev_priv) < 11)
>  		return 1;
>  	return 2;
>  }
>  
> +void icl_dbuf_slices_restore(struct drm_i915_private *dev_priv)
> +{
> +	const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
> +
> +	icl_dbuf_slices_update(dev_priv, hw_enabled_slices);
> +}
> +
>  void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
>  			    u8 req_slices)
>  {
> -	const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
> -	bool ret;
> +	bool ret = true;
> +	int i;
> +	int max_slices = intel_dbuf_max_slices(dev_priv);
>  
> -	if (req_slices > intel_dbuf_max_slices(dev_priv)) {
> +	if (hweight8(req_slices) > intel_dbuf_max_slices(dev_priv)) {
>  		DRM_ERROR("Invalid number of dbuf slices requested\n");
>  		return;
>  	}
>  
> -	if (req_slices == hw_enabled_slices || req_slices == 0)
> -		return;
> +	DRM_DEBUG_KMS("Updating dbuf slices to %x\n", req_slices);
>  
> -	if (req_slices > hw_enabled_slices)
> -		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true);
> -	else
> -		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, false);
> +	for (i = 0; i < max_slices; i++) {
> +		int slice_bit = 1 << i;

BIT(i) would be slightly cleaner.

> +		bool slice_set = ((slice_bit) & req_slices) != 0;

Unnecessary parens.

> +
> +		switch (slice_bit) {
> +		case DBUF_S1_BIT:
> +			ret = ret && intel_dbuf_slice_set(dev_priv,
> +							  DBUF_CTL_S1,
> +							  slice_set);
> +			break;
> +		case DBUF_S2_BIT:
> +			ret = ret && intel_dbuf_slice_set(dev_priv,
> +							  DBUF_CTL_S2,
> +							  slice_set);
> +			break;
> +		default:
> +			WARN(1, "Unsupported DBUF slice");

Should probably use MISSING_CASE(slice_bit) here.

> +		}
> +	}
>  
>  	if (ret)
>  		dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices;

I'm not sure how valuable the success-tracking in this function is.  If
we fail to power up/down the slice something went really wrong and we're
probably going to be in an inconsistent state no matter what (possibly
allocating DDB blocks to a slice that never powered up).  We already
print warnings in intel_dbuf_slice_set() if we fail, so I'd be inclined
to just set enabled slices unconditionally here and hope that it was
just a matter of the hardware being extra slow to honor our request (so
the slice would actually power up/down with a little more time).  We're
already in the atomic commit phase at this point, so there isn't much we
can really do as far as failing gracefully in cases where the slice
didn't power up.

> @@ -4286,40 +4326,21 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
>  
>  static void icl_dbuf_enable(struct drm_i915_private *dev_priv)
>  {
> -	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST);
> -	I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) | DBUF_POWER_REQUEST);
> -	POSTING_READ(DBUF_CTL_S2);
> -
> -	udelay(10);
> -
> -	if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
> -	    !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
> -		DRM_ERROR("DBuf power enable timeout\n");
> -	else
> -		/*
> -		 * FIXME: for now pretend that we only have 1 slice, see
> -		 * intel_enabled_dbuf_slices_num().
> -		 */
> -		dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
> +	/*
> +	 * For now pretend that we only have 1 slice, we will
> +	 * figure out later which slices we have and what we need.
> +	 */

I'd write this comment more as "Just power up slice 1 for now; we can
enable other slice(s) later as necessary."

> +	dev_priv->wm.skl_hw.ddb.enabled_slices = 1;

Since this is a bitmask, using DBUF_S1_BIT here would add clarity.

> +	icl_dbuf_slices_restore(dev_priv);
>  }
>  
>  static void icl_dbuf_disable(struct drm_i915_private *dev_priv)
>  {
> -	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) & ~DBUF_POWER_REQUEST);
> -	I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) & ~DBUF_POWER_REQUEST);
> -	POSTING_READ(DBUF_CTL_S2);
> -
> -	udelay(10);
> -
> -	if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
> -	    (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
> -		DRM_ERROR("DBuf power disable timeout!\n");
> -	else
> -		/*
> -		 * FIXME: for now pretend that the first slice is always
> -		 * enabled, see intel_enabled_dbuf_slices_num().
> -		 */
> -		dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
> +	/*
> +	 * Disable all slices
> +	 */
> +	dev_priv->wm.skl_hw.ddb.enabled_slices = 0;
> +	icl_dbuf_slices_restore(dev_priv);
>  }
>  
>  static void icl_mbus_init(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
> index 1da04f3e0fb3..4d5bb0accef7 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_power.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_power.h
> @@ -311,8 +311,10 @@ intel_display_power_put_async(struct drm_i915_private *i915,
>  	for ((wf) = intel_display_power_get((i915), (domain)); (wf); \
>  	     intel_display_power_put_async((i915), (domain), (wf)), (wf) = 0)
>  
> +u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv);
>  void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
>  			    u8 req_slices);
> +void icl_dbuf_slices_restore(struct drm_i915_private *dev_priv);
>  
>  void chv_phy_powergate_lanes(struct intel_encoder *encoder,
>  			     bool override, unsigned int mask);
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 82e4e6bf08c3..0307aec2c928 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -51,6 +51,7 @@
>  #include "display/intel_bw.h"
>  #include "display/intel_cdclk.h"
>  #include "display/intel_display_types.h"
> +#include "display/intel_display_power.h"
>  #include "display/intel_dp.h"
>  #include "display/intel_fbdev.h"
>  #include "display/intel_hotplug.h"
> @@ -2588,6 +2589,10 @@ static int intel_runtime_resume(struct device *kdev)
>  	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
>  		ret = vlv_resume_prepare(dev_priv, true);
>  
> +	/* Weird hack to fix ICL hardware bug, as it resets DBUF slices reg */
> +	if (INTEL_GEN(dev_priv) == 11)
> +		icl_dbuf_slices_restore(dev_priv);
> +
>  	intel_uncore_runtime_resume(&dev_priv->uncore);
>  
>  	intel_runtime_pm_enable_interrupts(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7e0f67babe20..78bd010cb59a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -804,7 +804,8 @@ static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1,
>  }
>  
>  struct skl_ddb_allocation {
> -	u8 enabled_slices; /* GEN11 has configurable 2 slices */
> +	u8 enabled_slices;   /* Bitmask of currently enabled DBuf slices */
> +	u8 supported_slices; /* GEN 11 supports 2 slices */

This might fit better in intel_device_info, possibly next to the
definition of ddb_size?


>  };
>  
>  struct skl_ddb_values {
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index a607ea520829..7eca2adbb281 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7734,6 +7734,8 @@ enum {
>  #define DISP_ARB_CTL2	_MMIO(0x45004)
>  #define  DISP_DATA_PARTITION_5_6	(1 << 6)
>  #define  DISP_IPC_ENABLE		(1 << 3)
> +#define DBUF_S1_BIT			1
> +#define DBUF_S2_BIT			2

These should probably be BIT(0) and BIT(1) for clarity.  Alternatively,
you could parameterize the macro as something like

  #define DBUF_SLICE(i)   BIT(i-1)

>  #define DBUF_CTL	_MMIO(0x45008)
>  #define DBUF_CTL_S1	_MMIO(0x45008)
>  #define DBUF_CTL_S2	_MMIO(0x44FE8)
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 2d389e437e87..ebe537a47879 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3586,24 +3586,20 @@ bool ilk_disable_lp_wm(struct drm_device *dev)
>  	return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
>  }
>  
> -static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
> +static u8 intel_enabled_dbuf_slices(struct drm_i915_private *dev_priv)
>  {
> -	u8 enabled_slices;
> -
> -	/* Slice 1 will always be enabled */
> -	enabled_slices = 1;
> +	u8 enabled_slices = 0;
>  
>  	/* Gen prior to GEN11 have only one DBuf slice */
>  	if (INTEL_GEN(dev_priv) < 11)
> -		return enabled_slices;
> +		return 1;
>  
> -	/*
> -	 * FIXME: for now we'll only ever use 1 slice; pretend that we have
> -	 * only that 1 slice enabled until we have a proper way for on-demand
> -	 * toggling of the second slice.
> -	 */
> -	if (0 && I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
> -		enabled_slices++;
> +	/* Check if second DBuf slice is enabled */
> +	if (I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE)
> +		enabled_slices |= 1;
> +
> +	if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
> +		enabled_slices |= 2;

These should be "enabled_slices |= DBUF_S#_BIT" for clarity.

>  
>  	return enabled_slices;
>  }
> @@ -3812,36 +3808,44 @@ static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
>  			      const int num_active,
>  			      struct skl_ddb_allocation *ddb)
>  {
> -	const struct drm_display_mode *adjusted_mode;
> -	u64 total_data_bw;
>  	u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
>  
>  	WARN_ON(ddb_size == 0);
> +	ddb->supported_slices = intel_dbuf_max_slices(dev_priv);
>  
>  	if (INTEL_GEN(dev_priv) < 11)
>  		return ddb_size - 4; /* 4 blocks for bypass path allocation */
>  
> -	adjusted_mode = &crtc_state->hw.adjusted_mode;
> -	total_data_bw = total_data_rate * drm_mode_vrefresh(adjusted_mode);
> +	return ddb_size;
> +}
>  
> -	/*
> -	 * 12GB/s is maximum BW supported by single DBuf slice.
> -	 *
> -	 * FIXME dbuf slice code is broken:
> -	 * - must wait for planes to stop using the slice before powering it off
> -	 * - plane straddling both slices is illegal in multi-pipe scenarios
> -	 * - should validate we stay within the hw bandwidth limits
> -	 */
> -	if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
> -		ddb->enabled_slices = 2;
> -	} else {
> -		ddb->enabled_slices = 1;
> -		ddb_size /= 2;
> -	}
> +/*
> + * Calculate initial DBuf slice offset, based on slice size
> + * and mask(i.e if slice size is 1024 and second slice is enabled
> + * offset would be 1024)
> + */
> +static u32 skl_get_first_dbuf_slice_offset(u32 dbuf_slice_mask,
> +					   u32 slice_size, u32 ddb_size)
> +{
> +	u32 offset = 0;
>  
> -	return ddb_size;
> +	if (!dbuf_slice_mask)
> +		return 0;
> +
> +	while (!(dbuf_slice_mask & 1)) {
> +		dbuf_slice_mask >>= 1;
> +		offset += slice_size;
> +		if (offset >= ddb_size)
> +			break;
> +	}
> +	WARN_ON(offset >= ddb_size);
> +	return offset;

I think we could simplify this function with ffs():

    static u32 skl_get_first_dbuf_slice_offset(u32 dbuf_slice_mask,
                                               u32 slice_size, u32 ddb_size)
    {
            u32 offset = ffs(dbuf_slice_mask) * slice_size;

            WARN_ON(offset >= ddb_size);
            return offset;
    }

or just inline it into the callsite now that it's short enough.

>  }
>  
> +static u32 i915_get_allowed_dbuf_mask(struct drm_i915_private *dev_priv,
> +				      int pipe, u32 active_pipes,
> +				      const struct intel_crtc_state *crtc_state);
> +
>  static void
>  skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  				   const struct intel_crtc_state *crtc_state,
> @@ -3857,7 +3861,14 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  	u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
>  	enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
>  	u16 ddb_size;
> +	u32 ddb_range_size;
>  	u32 i;
> +	u32 dbuf_slice_mask;
> +	u32 active_pipes;
> +	u32 offset;
> +	u32 slice_size;
> +	u32 total_slice_mask;
> +	u32 start, end;
>  
>  	if (WARN_ON(!state) || !crtc_state->hw.active) {
>  		alloc->start = 0;
> @@ -3866,14 +3877,23 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  		return;
>  	}
>  
> -	if (intel_state->active_pipe_changes)
> +	if (intel_state->active_pipe_changes) {
>  		*num_active = hweight8(intel_state->active_pipes);
> -	else
> +		active_pipes = intel_state->active_pipes;
> +	} else {
>  		*num_active = hweight8(dev_priv->active_pipes);
> +		active_pipes = dev_priv->active_pipes;
> +	}

At the moment this is still safe (any changes to active_pipes requires
grabbing the locks on *all* CRTCs on gen9+), but once we land your
dbuf support, we should come back and make the locking more fine-grained
since at that point I don't think there should be a need for a modeset
that affects one slice to block modesets against CRTCs using other
slices.

>  
>  	ddb_size = intel_get_ddb_size(dev_priv, crtc_state, total_data_rate,
>  				      *num_active, ddb);
>  
> +	DRM_DEBUG_KMS("Got total ddb size %d\n", ddb_size);
> +
> +	slice_size = ddb_size / ddb->supported_slices;
> +
> +	DRM_DEBUG_KMS("Got DBuf slice size %d\n", slice_size);
> +
>  	/*
>  	 * If the state doesn't change the active CRTC's or there is no
>  	 * modeset request, then there's no need to recalculate;
> @@ -3891,20 +3911,73 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  		return;
>  	}
>  
> +	/*
> +	 * Get allowed DBuf slices for correspondent pipe and platform.
> +	 */
> +	dbuf_slice_mask = i915_get_allowed_dbuf_mask(dev_priv, for_pipe,
> +						     active_pipes, crtc_state);
> +
> +	DRM_DEBUG_KMS("DBuf slice mask %x pipe %d active pipes %x\n",
> +		      dbuf_slice_mask,
> +		      for_pipe, active_pipes);
> +
> +	/*
> +	 * Figure out at which DBuf slice we start, i.e if we start at Dbuf S2
> +	 * and slice size is 1024, the offset would be 1024
> +	 */
> +	offset = skl_get_first_dbuf_slice_offset(dbuf_slice_mask,
> +						 slice_size, ddb_size);
> +
> +	/*
> +	 * Figure out total size of allowed DBuf slices, which is basically
> +	 * a number of allowed slices for that pipe multiplied by slice size.
> +	 * We might still have some dbuf slices disabled in case if those
> +	 * are not needed based on bandwidth requirements and num_active pipes,
> +	 * so stick to real ddb size if it happens to be less. Inside of this
> +	 * range ddb entries are still allocated in proportion to display width.
> +	 */
> +	ddb_range_size = min(hweight8(dbuf_slice_mask) * slice_size,
> +			     (unsigned int)ddb_size);

Isn't ddb_size the total size of the ddb assuming all slices are
enabled?  I don't see how that could be less than mask * slice_size?


Matt

> +
>  	/*
>  	 * Watermark/ddb requirement highly depends upon width of the
>  	 * framebuffer, So instead of allocating DDB equally among pipes
>  	 * distribute DDB based on resolution/width of the display.
>  	 */
> +	total_slice_mask = dbuf_slice_mask;
>  	for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
>  		const struct drm_display_mode *adjusted_mode =
>  			&crtc_state->hw.adjusted_mode;
>  		enum pipe pipe = crtc->pipe;
>  		int hdisplay, vdisplay;
> +		u32 pipe_dbuf_slice_mask = \
> +			i915_get_allowed_dbuf_mask(dev_priv,
> +						pipe,
> +						active_pipes,
> +						crtc_state);
>  
>  		if (!crtc_state->hw.enable)
>  			continue;
>  
> +		/*
> +		 * According to BSpec pipe can share one dbuf slice with another
> +		 * pipes or pipe can use multiple dbufs, in both cases we
> +		 * account for other pipes only if they have exactly same mask.
> +		 * However we need to account how many slices we should enable
> +		 * in total.
> +		 */
> +		total_slice_mask |= pipe_dbuf_slice_mask;
> +
> +		/*
> +		 * Do not account pipes using other slice sets
> +		 * luckily as of current BSpec slice sets do not partially
> +		 * intersect(pipes share either same one slice or same slice set
> +		 * i.e no partial intersection), so it is enough to check for
> +		 * equality for now.
> +		 */
> +		if (dbuf_slice_mask != pipe_dbuf_slice_mask)
> +			continue;
> +
>  		drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
>  		total_width += hdisplay;
>  
> @@ -3914,8 +3987,19 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
>  			pipe_width = hdisplay;
>  	}
>  
> -	alloc->start = ddb_size * width_before_pipe / total_width;
> -	alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
> +	ddb->enabled_slices = total_slice_mask;
> +
> +	start = ddb_range_size * width_before_pipe / total_width;
> +	end = ddb_range_size * (width_before_pipe + pipe_width) / total_width;
> +
> +	alloc->start = offset + start;
> +	alloc->end = offset + end;
> +
> +	DRM_DEBUG_KMS("Pipe %d ddb %d-%d\n", for_pipe,
> +		      alloc->start, alloc->end);
> +	DRM_DEBUG_KMS("Enabled ddb slices mask %x num supported %d\n",
> +		      ddb->enabled_slices,
> +		      ddb->supported_slices);
>  }
>  
>  static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
> @@ -4036,7 +4120,8 @@ void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
>  void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
>  			  struct skl_ddb_allocation *ddb /* out */)
>  {
> -	ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
> +	ddb->enabled_slices = intel_enabled_dbuf_slices(dev_priv);
> +	DRM_DEBUG_KMS("Got hw dbuf slices mask %x\n", ddb->enabled_slices);
>  }
>  
>  /*
> @@ -4164,6 +4249,238 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
>  	return total_data_rate;
>  }
>  
> +uint_fixed_16_16_t
> +skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
> +{
> +	u32 src_w, src_h, dst_w, dst_h;
> +	uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
> +	uint_fixed_16_16_t downscale_h, downscale_w;
> +	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
> +
> +	src_w = crtc_state->pipe_src_w;
> +	src_h = crtc_state->pipe_src_h;
> +	dst_w = adjusted_mode->crtc_hdisplay;
> +	dst_h = adjusted_mode->crtc_vdisplay;
> +
> +	fp_w_ratio = div_fixed16(src_w, dst_w);
> +	fp_h_ratio = div_fixed16(src_h, dst_h);
> +	downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
> +	downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
> +
> +	return mul_fixed16(downscale_w, downscale_h);
> +}
> +
> +uint_fixed_16_16_t
> +icl_get_pipe_ratio(const struct intel_crtc_state *crtc_state)
> +{
> +	struct drm_plane *plane;
> +	const struct drm_plane_state *drm_plane_state;
> +	uint_fixed_16_16_t pipe_downscale, pipe_ratio;
> +	uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
> +
> +	if (!crtc_state->uapi.enable)
> +		return max_downscale;
> +
> +	drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->uapi) {
> +		uint_fixed_16_16_t plane_downscale;
> +		const struct intel_plane_state *plane_state =
> +			to_intel_plane_state(drm_plane_state);
> +
> +		if (!intel_wm_plane_visible(crtc_state, plane_state))
> +			continue;
> +
> +		plane_downscale = skl_plane_downscale_amount(crtc_state, plane_state);
> +
> +		DRM_DEBUG_KMS("Plane %d downscale amount %d.%d\n",
> +			      to_intel_plane(plane)->id,
> +			      plane_downscale.val >> 16,
> +			      plane_downscale.val & 0xffff);
> +
> +		max_downscale = max_fixed16(plane_downscale, max_downscale);
> +	}
> +
> +
> +	pipe_downscale = skl_pipe_downscale_amount(crtc_state);
> +
> +	DRM_DEBUG_KMS("Pipe %d downscale amount %d.%d\n",
> +		       crtc->pipe, pipe_downscale.val >> 16,
> +		       pipe_downscale.val & 0xffff);
> +
> +	pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
> +
> +	/* Convert result to percentage */
> +	pipe_ratio = u32_to_fixed16(100 * div_round_up_u32_fixed16(1, pipe_downscale));
> +
> +	DRM_DEBUG_KMS("Pipe %d ratio %d.%d\n", crtc->pipe,
> +		      pipe_ratio.val >> 16, pipe_ratio.val & 0xffff);
> +
> +	return pipe_ratio;
> +}
> +
> +struct dbuf_slice_conf_entry {
> +	u32 dbuf_mask[I915_MAX_PIPES];
> +	u32 active_pipes;
> +};
> +
> +
> +#define ICL_PIPE_A_DBUF_SLICES(DBuf1)  \
> +	{ { DBuf1, 0, 0, 0 }, BIT(PIPE_A) }
> +#define ICL_PIPE_B_DBUF_SLICES(DBuf1)  \
> +	{ { 0, DBuf1, 0, 0 }, BIT(PIPE_B) }
> +#define ICL_PIPE_C_DBUF_SLICES(DBuf1)  \
> +	{ { 0, 0, DBuf1, 0 }, BIT(PIPE_C) }
> +#define ICL_PIPE_D_DBUF_SLICES(DBuf1)  \
> +	{ { 0, 0, 0, DBuf1 }, BIT(PIPE_D) }
> +#define ICL_PIPE_AB_DBUF_SLICES(DBuf1, DBuf2)   \
> +	{ { DBuf1, DBuf2, 0, 0 }, BIT(PIPE_A) | BIT(PIPE_B) }
> +#define ICL_PIPE_BC_DBUF_SLICES(DBuf1, DBuf2)   \
> +	{ { 0, DBuf1, DBuf2, 0 }, BIT(PIPE_B) | BIT(PIPE_C) }
> +#define ICL_PIPE_BD_DBUF_SLICES(DBuf1, DBuf2)   \
> +	{ { 0, DBuf1, 0, DBuf2 }, BIT(PIPE_B) | BIT(PIPE_D) }
> +#define ICL_PIPE_AC_DBUF_SLICES(DBuf1, DBuf2)   \
> +	{ { DBuf1, 0, DBuf2, 0 }, BIT(PIPE_A) | BIT(PIPE_C) }
> +#define ICL_PIPE_AD_DBUF_SLICES(DBuf1, DBuf2)   \
> +	{ { DBuf1, 0, 0, DBuf2 }, BIT(PIPE_A) | BIT(PIPE_D) }
> +#define ICL_PIPE_CD_DBUF_SLICES(DBuf1, DBuf2)   \
> +	{ { 0, 0, DBuf1, DBuf2 }, BIT(PIPE_C) | BIT(PIPE_D) }
> +#define ICL_PIPE_ABC_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> +	{ { DBuf1, DBuf2, DBuf3, 0 }, BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) }
> +#define ICL_PIPE_ACD_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> +	{ { DBuf1, 0, DBuf2, DBuf3 }, BIT(PIPE_A) | BIT(PIPE_C) | BIT(PIPE_D) }
> +#define ICL_PIPE_BCD_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> +	{ { 0, DBuf1, DBuf2, DBuf3 }, BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D) }
> +#define ICL_PIPE_ABD_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> +	{ { DBuf1, DBuf2, 0, DBuf3 }, BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_D) }
> +#define ICL_PIPE_ABC_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> +	{ { DBuf1, DBuf2, DBuf3, 0 }, BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) }
> +#define ICL_PIPE_ABCD_DBUF_SLICES(DBuf1, DBuf2, DBuf3, DBuf4)  \
> +	{ { DBuf1, DBuf2, DBuf3, DBuf4 }, BIT(PIPE_A) | BIT(PIPE_B) | \
> +					  BIT(PIPE_C) | BIT(PIPE_D) }
> +
> +/*
> + * Table taken from Bspec 12716
> + * Pipes do have some preferred DBuf slice affinity,
> + * plus there are some hardcoded requirements on how
> + * those should be distributed for multipipe scenarios.
> + * For more DBuf slices algorithm can get even more messy
> + * and less readable, so decided to use a table almost
> + * as is from BSpec itself - that way it is at least easier
> + * to compare, change and check.
> + */
> +struct dbuf_slice_conf_entry icl_allowed_dbufs[] = {
> +{ { 0, 0, 0, 0 }, 0 },
> +ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),  /* pipe ratio < 88.8 */
> +ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT),                /* for pipe ratio >= 88.8 */
> +ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),  /* pipe ratio < 88.8 */
> +ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT),                /* for pipe ratio >= 88.8 */
> +ICL_PIPE_C_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),  /* pipe ratio < 88.8 */
> +ICL_PIPE_C_DBUF_SLICES(DBUF_S2_BIT),                /* for pipe ratio >= 88.8 */
> +ICL_PIPE_AB_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_AC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_BC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ABC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
> +};
> +
> +/*
> + * Table taken from Bspec 49255
> + * Pipes do have some preferred DBuf slice affinity,
> + * plus there are some hardcoded requirements on how
> + * those should be distributed for multipipe scenarios.
> + * For more DBuf slices algorithm can get even more messy
> + * and less readable, so decided to use a table almost
> + * as is from BSpec itself - that way it is at least easier
> + * to compare, change and check.
> + */
> +struct dbuf_slice_conf_entry tgl_allowed_dbufs[] = {
> +{ { 0, 0, 0, 0 }, 0 },
> +ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> +ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> +ICL_PIPE_C_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> +ICL_PIPE_D_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> +ICL_PIPE_AB_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_AC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_BC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_AD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_BD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_CD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ABD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ABC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ACD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
> +ICL_PIPE_BCD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
> +ICL_PIPE_ABCD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
> +};
> +
> +/*
> + * This function finds an entry with same enabled pipe configuration and
> + * returns correspondent DBuf slice mask as stated in BSpec for particular
> + * platform.
> + */
> +static u32 icl_get_allowed_dbuf_mask(int pipe,
> +				     u32 active_pipes,
> +				     const struct intel_crtc_state *crtc_state)
> +{
> +	int i;
> +	uint_fixed_16_16_t pipe_ratio;
> +
> +	/*
> +	 * Calculate pipe ratio as stated in BSpec 28692
> +	 */
> +	pipe_ratio = icl_get_pipe_ratio(crtc_state);
> +
> +	for (i = 0; i < ARRAY_SIZE(icl_allowed_dbufs); i++) {
> +		if (icl_allowed_dbufs[i].active_pipes == active_pipes) {
> +			if (hweight32(active_pipes) == 1) {
> +				/*
> +				 * According to BSpec 12716: if ratio >= 88.8
> +				 * always use single dbuf slice.
> +				 */
> +				if (pipe_ratio.val >= div_fixed16(888, 10).val)
> +					++i;
> +			}
> +			return icl_allowed_dbufs[i].dbuf_mask[pipe];
> +		}
> +	}
> +	return 0;
> +}
> +
> +/*
> + * This function finds an entry with same enabled pipe configuration and
> + * returns correspondent DBuf slice mask as stated in BSpec for particular
> + * platform.
> + */
> +static u32 tgl_get_allowed_dbuf_mask(int pipe,
> +				     u32 active_pipes,
> +				     const struct intel_crtc_state *crtc_state)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(tgl_allowed_dbufs); i++) {
> +		if (tgl_allowed_dbufs[i].active_pipes == active_pipes)
> +			return tgl_allowed_dbufs[i].dbuf_mask[pipe];
> +	}
> +	return 0;
> +}
> +
> +u32 i915_get_allowed_dbuf_mask(struct drm_i915_private *dev_priv,
> +				      int pipe, u32 active_pipes,
> +				      const struct intel_crtc_state *crtc_state)
> +{
> +	if (IS_GEN(dev_priv, 11))
> +		return icl_get_allowed_dbuf_mask(pipe,
> +						 active_pipes,
> +						 crtc_state);
> +	else if (IS_GEN(dev_priv, 12))
> +		return tgl_get_allowed_dbuf_mask(pipe,
> +						 active_pipes,
> +						 crtc_state);
> +	/*
> +	 * For anything else just return one slice yet.
> +	 * Should be extended for other platforms.
> +	 */
> +	return DBUF_S1_BIT;
> +}
> +
>  static u64
>  icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
>  				 u64 *plane_data_rate)
> -- 
> 2.17.1
>
Stanislav Lisovskiy Nov. 8, 2019, 1:56 p.m. UTC | #2
On Thu, 2019-11-07 at 16:52 -0800, Matt Roper wrote:
> On Thu, Nov 07, 2019 at 11:22:34PM +0200, Stanislav Lisovskiy wrote:
> > Also implemented algorithm for choosing DBuf slice configuration
> > based on active pipes, pipe ratio as stated in BSpec 12716.
> > 
> > Now pipe allocation still stays proportional to pipe width as
> > before,
> > however within allowed DBuf slice for this particular
> > configuration.
> > 
> > v2: Remove unneeded check from commit as ddb enabled slices might
> >     now differ from hw state.
> > 
> > v3: - Added new field "supported_slices" to ddb to separate max
> >       supported slices vs currently enabled, to avoid confusion.
> >     - Removed obsolete comments and code related to DBuf(Matthew
> > Roper).
> >     - Some code style and long line removal(Matthew Roper).
> >     - Added WARN_ON to new ddb range offset calc function(Matthew
> > Roper).
> >     - Removed platform specific call to calc pipe ratio from ddb
> >       allocation function and fixed the return value(Matthew Roper)
> >     - Refactored DBUF slice allocation table to improve readability
> >     - Added DBUF slice allocation for TGL as well.
> >     - ICL(however not TGL) seems to voluntarily enable second DBuf
> > slice
> >       after pm suspend/resume causing a mismatch failure, because
> > we
> >       update DBuf slices only if we do a modeset, however this
> > check
> >       is done always. Fixed it to be done only when modeset for
> > ICL.
> > 
> > v4: - Now enabled slices is not actually a number, but a bitmask,
> >       because we might need to enable second slice only and number
> >       of slices would still 1 and that current code doesn't allow.
> >     - Remove redundant duplicate code to have some unified way of
> >       enabling dbuf slices instead of hardcoding.
> > 
> > v5: - Fix failing gen9_assert_dbuf_enabled as it was naively
> > thinking
> >       that we have only one DBUF_CTL slice. Now another version for
> >       gen11 will check both slices as only second one can be
> > enabled,
> >       to keep CI happy.
> > 
> > Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> > Cc: Matthew Roper <matthew.d.roper@intel.com>
> > Cc: Ville Syrjälä <ville.syrjala@intel.com>
> > Cc: James Ausmus <james.ausmus@intel.com>
> > ---
> >  drivers/gpu/drm/i915/display/intel_display.c  |  22 +-
> >  .../drm/i915/display/intel_display_power.c    | 103 +++--
> >  .../drm/i915/display/intel_display_power.h    |   2 +
> >  drivers/gpu/drm/i915/i915_drv.c               |   5 +
> >  drivers/gpu/drm/i915/i915_drv.h               |   3 +-
> >  drivers/gpu/drm/i915/i915_reg.h               |   2 +
> >  drivers/gpu/drm/i915/intel_pm.c               | 391
> > ++++++++++++++++--
> >  7 files changed, 441 insertions(+), 87 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_display.c
> > b/drivers/gpu/drm/i915/display/intel_display.c
> > index 876fc25968bf..ee63da18772a 100644
> > --- a/drivers/gpu/drm/i915/display/intel_display.c
> > +++ b/drivers/gpu/drm/i915/display/intel_display.c
> > @@ -13386,8 +13386,9 @@ static void verify_wm_state(struct
> > intel_crtc *crtc,
> >  	sw_ddb = &dev_priv->wm.skl_hw.ddb;
> >  
> >  	if (INTEL_GEN(dev_priv) >= 11 &&
> > -	    hw->ddb.enabled_slices != sw_ddb->enabled_slices)
> > -		DRM_ERROR("mismatch in DBUF Slices (expected %u, got
> > %u)\n",
> > +	    hw->ddb.enabled_slices != sw_ddb->enabled_slices &&
> > +	    needs_modeset(new_crtc_state))
> > +		DRM_ERROR("mismatch in DBUF Slices (expected %x, got
> > %x)\n",
> >  			  sw_ddb->enabled_slices,
> >  			  hw->ddb.enabled_slices);
> 
> Is this related to your changelog comment above?
> 
>     - ICL(however not TGL) seems to voluntarily enable second DBuf
> slice
>       after pm suspend/resume causing a mismatch failure, because we
>       update DBuf slices only if we do a modeset, however this check
>       is done always. Fixed it to be done only when modeset for ICL.

Hi,

Thank you for really new review comments and hints. I have addressed
most of the comments
as they were quite valid. The only thing which I would prefer to leave
is accounting the ret value when we are setting the DBuf slices -
despite the fact that if we fail most likely it leads us to a very bad
state anyway, I would still prefer that software state matches the
hardware as much as possible. 


Stan

> 
> It looks like you added an icl_dbuf_slices_restore() function that
> tries
> to sanitize the dbuf programming on resume; is that not sufficient to
> address this?
> 
> >  
> > @@ -14613,16 +14614,21 @@ static void
> > skl_commit_modeset_enables(struct intel_atomic_state *state)
> >  	int i;
> >  	u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
> >  	u8 required_slices = state->wm_results.ddb.enabled_slices;
> > -	struct skl_ddb_entry entries[I915_MAX_PIPES] = {};
> > +	u8 slices_union = hw_enabled_slices | required_slices;
> > +	u8 slices_intersection = slices_union & required_slices;
> 
> Isn't this the same as "slices_intersection = required_slices?"
> 
> >  
> > +	struct skl_ddb_entry entries[I915_MAX_PIPES] = {};
> 
> Did you mean to move this line (and add a space before it but not
> after)?
> 
> >  	for_each_oldnew_intel_crtc_in_state(state, crtc,
> > old_crtc_state, new_crtc_state, i)
> >  		/* ignore allocations for crtc's that have been turned
> > off. */
> >  		if (new_crtc_state->hw.active)
> >  			entries[i] = old_crtc_state->wm.skl.ddb;
> >  
> > -	/* If 2nd DBuf slice required, enable it here */
> > -	if (INTEL_GEN(dev_priv) >= 11 && required_slices >
> > hw_enabled_slices)
> > -		icl_dbuf_slices_update(dev_priv, required_slices);
> > +	DRM_DEBUG_KMS("DBuf req slices %d hw slices %d\n",
> > +		      required_slices, hw_enabled_slices);
> > +
> > +	/* If some other DBuf slice required, enable it here */
> 
> Minor nitpick, but it might be more clear to write this as "Enable
> dbuf
> slices necessary to support both the old configuration and new
> configuration."  I.e., that helps clarify why we don't just jump
> straight to the final dbuf configuration here.
> 
> > +	if (INTEL_GEN(dev_priv) >= 11 && required_slices !=
> > hw_enabled_slices)
> > +		icl_dbuf_slices_update(dev_priv, slices_union);
> >  
> >  	/*
> >  	 * Whenever the number of active pipes changes, we need to make
> > sure we
> > @@ -14682,8 +14688,8 @@ static void
> > skl_commit_modeset_enables(struct intel_atomic_state *state)
> >  	} while (progress);
> >  
> >  	/* If 2nd DBuf slice is no more required disable it */
> 
> A corresponding change could be made to this comment (and removing
> the
> "2nd" terminology would future-proof the comment in case a future
> platform has more than just two slices).
> 
> > -	if (INTEL_GEN(dev_priv) >= 11 && required_slices <
> > hw_enabled_slices)
> > -		icl_dbuf_slices_update(dev_priv, required_slices);
> > +	if (INTEL_GEN(dev_priv) >= 11 && required_slices !=
> > hw_enabled_slices)
> > +		icl_dbuf_slices_update(dev_priv, slices_intersection);
> 
> As noted above, slices_intersection is equivalent to required_slices.
> 
> >  }
> >  
> >  static void intel_atomic_helper_free_state(struct drm_i915_private
> > *dev_priv)
> > diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c
> > b/drivers/gpu/drm/i915/display/intel_display_power.c
> > index ce1b64f4dd44..c04ebb19bb4f 100644
> > --- a/drivers/gpu/drm/i915/display/intel_display_power.c
> > +++ b/drivers/gpu/drm/i915/display/intel_display_power.c
> > @@ -1040,6 +1040,21 @@ static void gen9_assert_dbuf_enabled(struct
> > drm_i915_private *dev_priv)
> >  	     "Unexpected DBuf power power state (0x%08x)\n", tmp);
> >  }
> >  
> > +static void gen11_assert_dbuf_enabled(struct drm_i915_private
> > *dev_priv)
> > +{
> > +	u32 tmp = I915_READ(DBUF_CTL_S1);
> > +	u32 tmp2 = I915_READ(DBUF_CTL_S2);
> > +	bool s1_enabled = (tmp & (DBUF_POWER_STATE |
> > DBUF_POWER_REQUEST)) ==
> > +			  (DBUF_POWER_STATE | DBUF_POWER_REQUEST);
> > +
> > +	bool s2_enabled = (tmp2 & (DBUF_POWER_STATE |
> > DBUF_POWER_REQUEST)) ==
> > +			  (DBUF_POWER_STATE | DBUF_POWER_REQUEST);
> > +
> > +	WARN(!s1_enabled && !s2_enabled,
> > +	     "Unexpected DBuf power power state (0x%08x, 0x%08x)\n",
> > tmp,
> > +								     tm
> > p2);
> > +}
> > +
> >  static void gen9_disable_dc_states(struct drm_i915_private
> > *dev_priv)
> >  {
> >  	struct intel_cdclk_state cdclk_state = {};
> > @@ -1055,7 +1070,10 @@ static void gen9_disable_dc_states(struct
> > drm_i915_private *dev_priv)
> >  	/* Can't read out voltage_level so can't use
> > intel_cdclk_changed() */
> >  	WARN_ON(intel_cdclk_needs_modeset(&dev_priv->cdclk.hw,
> > &cdclk_state));
> >  
> > -	gen9_assert_dbuf_enabled(dev_priv);
> > +	if (INTEL_GEN(dev_priv) >= 11)
> > +		gen11_assert_dbuf_enabled(dev_priv);
> > +	else
> > +		gen9_assert_dbuf_enabled(dev_priv);
> >  
> >  	if (IS_GEN9_LP(dev_priv))
> >  		bxt_verify_ddi_phy_power_wells(dev_priv);
> > @@ -4254,31 +4272,53 @@ static void gen9_dbuf_disable(struct
> > drm_i915_private *dev_priv)
> >  	intel_dbuf_slice_set(dev_priv, DBUF_CTL, false);
> >  }
> >  
> > -static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
> > +u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
> 
> I realize your patch didn't introduce it, and it really doesn't
> matter,
> but when I see u8 or similar used as a return type I sort of expect
> the
> value to be a bitmask.  When it's just a count I don't see much
> reason
> to not just use a vanilla 'int,' even when the expected return values
> are likely to be small.  But it doesn't really matter, so I'll leave
> it
> up to you whether you want to change it.
> 
> Of course if we move this into intel_device_info (as I suggest
> farther
> down), we could just drop this function completely.
> 
> >  {
> >  	if (INTEL_GEN(dev_priv) < 11)
> >  		return 1;
> >  	return 2;
> >  }
> >  
> > +void icl_dbuf_slices_restore(struct drm_i915_private *dev_priv)
> > +{
> > +	const u8 hw_enabled_slices = dev_priv-
> > >wm.skl_hw.ddb.enabled_slices;
> > +
> > +	icl_dbuf_slices_update(dev_priv, hw_enabled_slices);
> > +}
> > +
> >  void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
> >  			    u8 req_slices)
> >  {
> > -	const u8 hw_enabled_slices = dev_priv-
> > >wm.skl_hw.ddb.enabled_slices;
> > -	bool ret;
> > +	bool ret = true;
> > +	int i;
> > +	int max_slices = intel_dbuf_max_slices(dev_priv);
> >  
> > -	if (req_slices > intel_dbuf_max_slices(dev_priv)) {
> > +	if (hweight8(req_slices) > intel_dbuf_max_slices(dev_priv)) {
> >  		DRM_ERROR("Invalid number of dbuf slices requested\n");
> >  		return;
> >  	}
> >  
> > -	if (req_slices == hw_enabled_slices || req_slices == 0)
> > -		return;
> > +	DRM_DEBUG_KMS("Updating dbuf slices to %x\n", req_slices);
> >  
> > -	if (req_slices > hw_enabled_slices)
> > -		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2,
> > true);
> > -	else
> > -		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2,
> > false);
> > +	for (i = 0; i < max_slices; i++) {
> > +		int slice_bit = 1 << i;
> 
> BIT(i) would be slightly cleaner.
> 
> > +		bool slice_set = ((slice_bit) & req_slices) != 0;
> 
> Unnecessary parens.
> 
> > +
> > +		switch (slice_bit) {
> > +		case DBUF_S1_BIT:
> > +			ret = ret && intel_dbuf_slice_set(dev_priv,
> > +							  DBUF_CTL_S1,
> > +							  slice_set);
> > +			break;
> > +		case DBUF_S2_BIT:
> > +			ret = ret && intel_dbuf_slice_set(dev_priv,
> > +							  DBUF_CTL_S2,
> > +							  slice_set);
> > +			break;
> > +		default:
> > +			WARN(1, "Unsupported DBUF slice");
> 
> Should probably use MISSING_CASE(slice_bit) here.
> 
> > +		}
> > +	}
> >  
> >  	if (ret)
> >  		dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices;
> 
> I'm not sure how valuable the success-tracking in this function
> is.  If
> we fail to power up/down the slice something went really wrong and
> we're
> probably going to be in an inconsistent state no matter what
> (possibly
> allocating DDB blocks to a slice that never powered up).  We already
> print warnings in intel_dbuf_slice_set() if we fail, so I'd be
> inclined
> to just set enabled slices unconditionally here and hope that it was
> just a matter of the hardware being extra slow to honor our request
> (so
> the slice would actually power up/down with a little more
> time).  We're
> already in the atomic commit phase at this point, so there isn't much
> we
> can really do as far as failing gracefully in cases where the slice
> didn't power up.
> 
> > @@ -4286,40 +4326,21 @@ void icl_dbuf_slices_update(struct
> > drm_i915_private *dev_priv,
> >  
> >  static void icl_dbuf_enable(struct drm_i915_private *dev_priv)
> >  {
> > -	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) |
> > DBUF_POWER_REQUEST);
> > -	I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) |
> > DBUF_POWER_REQUEST);
> > -	POSTING_READ(DBUF_CTL_S2);
> > -
> > -	udelay(10);
> > -
> > -	if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
> > -	    !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
> > -		DRM_ERROR("DBuf power enable timeout\n");
> > -	else
> > -		/*
> > -		 * FIXME: for now pretend that we only have 1 slice,
> > see
> > -		 * intel_enabled_dbuf_slices_num().
> > -		 */
> > -		dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
> > +	/*
> > +	 * For now pretend that we only have 1 slice, we will
> > +	 * figure out later which slices we have and what we need.
> > +	 */
> 
> I'd write this comment more as "Just power up slice 1 for now; we can
> enable other slice(s) later as necessary."
> 
> > +	dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
> 
> Since this is a bitmask, using DBUF_S1_BIT here would add clarity.
> 
> > +	icl_dbuf_slices_restore(dev_priv);
> >  }
> >  
> >  static void icl_dbuf_disable(struct drm_i915_private *dev_priv)
> >  {
> > -	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) &
> > ~DBUF_POWER_REQUEST);
> > -	I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) &
> > ~DBUF_POWER_REQUEST);
> > -	POSTING_READ(DBUF_CTL_S2);
> > -
> > -	udelay(10);
> > -
> > -	if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
> > -	    (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
> > -		DRM_ERROR("DBuf power disable timeout!\n");
> > -	else
> > -		/*
> > -		 * FIXME: for now pretend that the first slice is
> > always
> > -		 * enabled, see intel_enabled_dbuf_slices_num().
> > -		 */
> > -		dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
> > +	/*
> > +	 * Disable all slices
> > +	 */
> > +	dev_priv->wm.skl_hw.ddb.enabled_slices = 0;
> > +	icl_dbuf_slices_restore(dev_priv);
> >  }
> >  
> >  static void icl_mbus_init(struct drm_i915_private *dev_priv)
> > diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h
> > b/drivers/gpu/drm/i915/display/intel_display_power.h
> > index 1da04f3e0fb3..4d5bb0accef7 100644
> > --- a/drivers/gpu/drm/i915/display/intel_display_power.h
> > +++ b/drivers/gpu/drm/i915/display/intel_display_power.h
> > @@ -311,8 +311,10 @@ intel_display_power_put_async(struct
> > drm_i915_private *i915,
> >  	for ((wf) = intel_display_power_get((i915), (domain)); (wf); \
> >  	     intel_display_power_put_async((i915), (domain), (wf)),
> > (wf) = 0)
> >  
> > +u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv);
> >  void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
> >  			    u8 req_slices);
> > +void icl_dbuf_slices_restore(struct drm_i915_private *dev_priv);
> >  
> >  void chv_phy_powergate_lanes(struct intel_encoder *encoder,
> >  			     bool override, unsigned int mask);
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c
> > b/drivers/gpu/drm/i915/i915_drv.c
> > index 82e4e6bf08c3..0307aec2c928 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -51,6 +51,7 @@
> >  #include "display/intel_bw.h"
> >  #include "display/intel_cdclk.h"
> >  #include "display/intel_display_types.h"
> > +#include "display/intel_display_power.h"
> >  #include "display/intel_dp.h"
> >  #include "display/intel_fbdev.h"
> >  #include "display/intel_hotplug.h"
> > @@ -2588,6 +2589,10 @@ static int intel_runtime_resume(struct
> > device *kdev)
> >  	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> >  		ret = vlv_resume_prepare(dev_priv, true);
> >  
> > +	/* Weird hack to fix ICL hardware bug, as it resets DBUF slices
> > reg */
> > +	if (INTEL_GEN(dev_priv) == 11)
> > +		icl_dbuf_slices_restore(dev_priv);
> > +
> >  	intel_uncore_runtime_resume(&dev_priv->uncore);
> >  
> >  	intel_runtime_pm_enable_interrupts(dev_priv);
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > b/drivers/gpu/drm/i915/i915_drv.h
> > index 7e0f67babe20..78bd010cb59a 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -804,7 +804,8 @@ static inline bool skl_ddb_entry_equal(const
> > struct skl_ddb_entry *e1,
> >  }
> >  
> >  struct skl_ddb_allocation {
> > -	u8 enabled_slices; /* GEN11 has configurable 2 slices */
> > +	u8 enabled_slices;   /* Bitmask of currently enabled DBuf
> > slices */
> > +	u8 supported_slices; /* GEN 11 supports 2 slices */
> 
> This might fit better in intel_device_info, possibly next to the
> definition of ddb_size?
> 
> 
> >  };
> >  
> >  struct skl_ddb_values {
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h
> > b/drivers/gpu/drm/i915/i915_reg.h
> > index a607ea520829..7eca2adbb281 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -7734,6 +7734,8 @@ enum {
> >  #define DISP_ARB_CTL2	_MMIO(0x45004)
> >  #define  DISP_DATA_PARTITION_5_6	(1 << 6)
> >  #define  DISP_IPC_ENABLE		(1 << 3)
> > +#define DBUF_S1_BIT			1
> > +#define DBUF_S2_BIT			2
> 
> These should probably be BIT(0) and BIT(1) for
> clarity.  Alternatively,
> you could parameterize the macro as something like
> 
>   #define DBUF_SLICE(i)   BIT(i-1)
> 
> >  #define DBUF_CTL	_MMIO(0x45008)
> >  #define DBUF_CTL_S1	_MMIO(0x45008)
> >  #define DBUF_CTL_S2	_MMIO(0x44FE8)
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c
> > b/drivers/gpu/drm/i915/intel_pm.c
> > index 2d389e437e87..ebe537a47879 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -3586,24 +3586,20 @@ bool ilk_disable_lp_wm(struct drm_device
> > *dev)
> >  	return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
> >  }
> >  
> > -static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private
> > *dev_priv)
> > +static u8 intel_enabled_dbuf_slices(struct drm_i915_private
> > *dev_priv)
> >  {
> > -	u8 enabled_slices;
> > -
> > -	/* Slice 1 will always be enabled */
> > -	enabled_slices = 1;
> > +	u8 enabled_slices = 0;
> >  
> >  	/* Gen prior to GEN11 have only one DBuf slice */
> >  	if (INTEL_GEN(dev_priv) < 11)
> > -		return enabled_slices;
> > +		return 1;
> >  
> > -	/*
> > -	 * FIXME: for now we'll only ever use 1 slice; pretend that we
> > have
> > -	 * only that 1 slice enabled until we have a proper way for on-
> > demand
> > -	 * toggling of the second slice.
> > -	 */
> > -	if (0 && I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
> > -		enabled_slices++;
> > +	/* Check if second DBuf slice is enabled */
> > +	if (I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE)
> > +		enabled_slices |= 1;
> > +
> > +	if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
> > +		enabled_slices |= 2;
> 
> These should be "enabled_slices |= DBUF_S#_BIT" for clarity.
> 
> >  
> >  	return enabled_slices;
> >  }
> > @@ -3812,36 +3808,44 @@ static u16 intel_get_ddb_size(struct
> > drm_i915_private *dev_priv,
> >  			      const int num_active,
> >  			      struct skl_ddb_allocation *ddb)
> >  {
> > -	const struct drm_display_mode *adjusted_mode;
> > -	u64 total_data_bw;
> >  	u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
> >  
> >  	WARN_ON(ddb_size == 0);
> > +	ddb->supported_slices = intel_dbuf_max_slices(dev_priv);
> >  
> >  	if (INTEL_GEN(dev_priv) < 11)
> >  		return ddb_size - 4; /* 4 blocks for bypass path
> > allocation */
> >  
> > -	adjusted_mode = &crtc_state->hw.adjusted_mode;
> > -	total_data_bw = total_data_rate *
> > drm_mode_vrefresh(adjusted_mode);
> > +	return ddb_size;
> > +}
> >  
> > -	/*
> > -	 * 12GB/s is maximum BW supported by single DBuf slice.
> > -	 *
> > -	 * FIXME dbuf slice code is broken:
> > -	 * - must wait for planes to stop using the slice before
> > powering it off
> > -	 * - plane straddling both slices is illegal in multi-pipe
> > scenarios
> > -	 * - should validate we stay within the hw bandwidth limits
> > -	 */
> > -	if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
> > -		ddb->enabled_slices = 2;
> > -	} else {
> > -		ddb->enabled_slices = 1;
> > -		ddb_size /= 2;
> > -	}
> > +/*
> > + * Calculate initial DBuf slice offset, based on slice size
> > + * and mask(i.e if slice size is 1024 and second slice is enabled
> > + * offset would be 1024)
> > + */
> > +static u32 skl_get_first_dbuf_slice_offset(u32 dbuf_slice_mask,
> > +					   u32 slice_size, u32
> > ddb_size)
> > +{
> > +	u32 offset = 0;
> >  
> > -	return ddb_size;
> > +	if (!dbuf_slice_mask)
> > +		return 0;
> > +
> > +	while (!(dbuf_slice_mask & 1)) {
> > +		dbuf_slice_mask >>= 1;
> > +		offset += slice_size;
> > +		if (offset >= ddb_size)
> > +			break;
> > +	}
> > +	WARN_ON(offset >= ddb_size);
> > +	return offset;
> 
> I think we could simplify this function with ffs():
> 
>     static u32 skl_get_first_dbuf_slice_offset(u32 dbuf_slice_mask,
>                                                u32 slice_size, u32
> ddb_size)
>     {
>             u32 offset = ffs(dbuf_slice_mask) * slice_size;
> 
>             WARN_ON(offset >= ddb_size);
>             return offset;
>     }
> 
> or just inline it into the callsite now that it's short enough.
> 
> >  }
> >  
> > +static u32 i915_get_allowed_dbuf_mask(struct drm_i915_private
> > *dev_priv,
> > +				      int pipe, u32 active_pipes,
> > +				      const struct intel_crtc_state
> > *crtc_state);
> > +
> >  static void
> >  skl_ddb_get_pipe_allocation_limits(struct drm_i915_private
> > *dev_priv,
> >  				   const struct intel_crtc_state
> > *crtc_state,
> > @@ -3857,7 +3861,14 @@ skl_ddb_get_pipe_allocation_limits(struct
> > drm_i915_private *dev_priv,
> >  	u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
> >  	enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
> >  	u16 ddb_size;
> > +	u32 ddb_range_size;
> >  	u32 i;
> > +	u32 dbuf_slice_mask;
> > +	u32 active_pipes;
> > +	u32 offset;
> > +	u32 slice_size;
> > +	u32 total_slice_mask;
> > +	u32 start, end;
> >  
> >  	if (WARN_ON(!state) || !crtc_state->hw.active) {
> >  		alloc->start = 0;
> > @@ -3866,14 +3877,23 @@ skl_ddb_get_pipe_allocation_limits(struct
> > drm_i915_private *dev_priv,
> >  		return;
> >  	}
> >  
> > -	if (intel_state->active_pipe_changes)
> > +	if (intel_state->active_pipe_changes) {
> >  		*num_active = hweight8(intel_state->active_pipes);
> > -	else
> > +		active_pipes = intel_state->active_pipes;
> > +	} else {
> >  		*num_active = hweight8(dev_priv->active_pipes);
> > +		active_pipes = dev_priv->active_pipes;
> > +	}
> 
> At the moment this is still safe (any changes to active_pipes
> requires
> grabbing the locks on *all* CRTCs on gen9+), but once we land your
> dbuf support, we should come back and make the locking more fine-
> grained
> since at that point I don't think there should be a need for a
> modeset
> that affects one slice to block modesets against CRTCs using other
> slices.
> 
> >  
> >  	ddb_size = intel_get_ddb_size(dev_priv, crtc_state,
> > total_data_rate,
> >  				      *num_active, ddb);
> >  
> > +	DRM_DEBUG_KMS("Got total ddb size %d\n", ddb_size);
> > +
> > +	slice_size = ddb_size / ddb->supported_slices;
> > +
> > +	DRM_DEBUG_KMS("Got DBuf slice size %d\n", slice_size);
> > +
> >  	/*
> >  	 * If the state doesn't change the active CRTC's or there is no
> >  	 * modeset request, then there's no need to recalculate;
> > @@ -3891,20 +3911,73 @@ skl_ddb_get_pipe_allocation_limits(struct
> > drm_i915_private *dev_priv,
> >  		return;
> >  	}
> >  
> > +	/*
> > +	 * Get allowed DBuf slices for correspondent pipe and platform.
> > +	 */
> > +	dbuf_slice_mask = i915_get_allowed_dbuf_mask(dev_priv,
> > for_pipe,
> > +						     active_pipes,
> > crtc_state);
> > +
> > +	DRM_DEBUG_KMS("DBuf slice mask %x pipe %d active pipes %x\n",
> > +		      dbuf_slice_mask,
> > +		      for_pipe, active_pipes);
> > +
> > +	/*
> > +	 * Figure out at which DBuf slice we start, i.e if we start at
> > Dbuf S2
> > +	 * and slice size is 1024, the offset would be 1024
> > +	 */
> > +	offset = skl_get_first_dbuf_slice_offset(dbuf_slice_mask,
> > +						 slice_size, ddb_size);
> > +
> > +	/*
> > +	 * Figure out total size of allowed DBuf slices, which is
> > basically
> > +	 * a number of allowed slices for that pipe multiplied by slice
> > size.
> > +	 * We might still have some dbuf slices disabled in case if
> > those
> > +	 * are not needed based on bandwidth requirements and
> > num_active pipes,
> > +	 * so stick to real ddb size if it happens to be less. Inside
> > of this
> > +	 * range ddb entries are still allocated in proportion to
> > display width.
> > +	 */
> > +	ddb_range_size = min(hweight8(dbuf_slice_mask) * slice_size,
> > +			     (unsigned int)ddb_size);
> 
> Isn't ddb_size the total size of the ddb assuming all slices are
> enabled?  I don't see how that could be less than mask * slice_size?
> 
> 
> Matt
> 
> > +
> >  	/*
> >  	 * Watermark/ddb requirement highly depends upon width of the
> >  	 * framebuffer, So instead of allocating DDB equally among
> > pipes
> >  	 * distribute DDB based on resolution/width of the display.
> >  	 */
> > +	total_slice_mask = dbuf_slice_mask;
> >  	for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state,
> > i) {
> >  		const struct drm_display_mode *adjusted_mode =
> >  			&crtc_state->hw.adjusted_mode;
> >  		enum pipe pipe = crtc->pipe;
> >  		int hdisplay, vdisplay;
> > +		u32 pipe_dbuf_slice_mask = \
> > +			i915_get_allowed_dbuf_mask(dev_priv,
> > +						pipe,
> > +						active_pipes,
> > +						crtc_state);
> >  
> >  		if (!crtc_state->hw.enable)
> >  			continue;
> >  
> > +		/*
> > +		 * According to BSpec pipe can share one dbuf slice
> > with another
> > +		 * pipes or pipe can use multiple dbufs, in both cases
> > we
> > +		 * account for other pipes only if they have exactly
> > same mask.
> > +		 * However we need to account how many slices we should
> > enable
> > +		 * in total.
> > +		 */
> > +		total_slice_mask |= pipe_dbuf_slice_mask;
> > +
> > +		/*
> > +		 * Do not account pipes using other slice sets
> > +		 * luckily as of current BSpec slice sets do not
> > partially
> > +		 * intersect(pipes share either same one slice or same
> > slice set
> > +		 * i.e no partial intersection), so it is enough to
> > check for
> > +		 * equality for now.
> > +		 */
> > +		if (dbuf_slice_mask != pipe_dbuf_slice_mask)
> > +			continue;
> > +
> >  		drm_mode_get_hv_timing(adjusted_mode, &hdisplay,
> > &vdisplay);
> >  		total_width += hdisplay;
> >  
> > @@ -3914,8 +3987,19 @@ skl_ddb_get_pipe_allocation_limits(struct
> > drm_i915_private *dev_priv,
> >  			pipe_width = hdisplay;
> >  	}
> >  
> > -	alloc->start = ddb_size * width_before_pipe / total_width;
> > -	alloc->end = ddb_size * (width_before_pipe + pipe_width) /
> > total_width;
> > +	ddb->enabled_slices = total_slice_mask;
> > +
> > +	start = ddb_range_size * width_before_pipe / total_width;
> > +	end = ddb_range_size * (width_before_pipe + pipe_width) /
> > total_width;
> > +
> > +	alloc->start = offset + start;
> > +	alloc->end = offset + end;
> > +
> > +	DRM_DEBUG_KMS("Pipe %d ddb %d-%d\n", for_pipe,
> > +		      alloc->start, alloc->end);
> > +	DRM_DEBUG_KMS("Enabled ddb slices mask %x num supported %d\n",
> > +		      ddb->enabled_slices,
> > +		      ddb->supported_slices);
> >  }
> >  
> >  static int skl_compute_wm_params(const struct intel_crtc_state
> > *crtc_state,
> > @@ -4036,7 +4120,8 @@ void skl_pipe_ddb_get_hw_state(struct
> > intel_crtc *crtc,
> >  void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
> >  			  struct skl_ddb_allocation *ddb /* out */)
> >  {
> > -	ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
> > +	ddb->enabled_slices = intel_enabled_dbuf_slices(dev_priv);
> > +	DRM_DEBUG_KMS("Got hw dbuf slices mask %x\n", ddb-
> > >enabled_slices);
> >  }
> >  
> >  /*
> > @@ -4164,6 +4249,238 @@ skl_get_total_relative_data_rate(struct
> > intel_crtc_state *crtc_state,
> >  	return total_data_rate;
> >  }
> >  
> > +uint_fixed_16_16_t
> > +skl_pipe_downscale_amount(const struct intel_crtc_state
> > *crtc_state)
> > +{
> > +	u32 src_w, src_h, dst_w, dst_h;
> > +	uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
> > +	uint_fixed_16_16_t downscale_h, downscale_w;
> > +	const struct drm_display_mode *adjusted_mode = &crtc_state-
> > >hw.adjusted_mode;
> > +
> > +	src_w = crtc_state->pipe_src_w;
> > +	src_h = crtc_state->pipe_src_h;
> > +	dst_w = adjusted_mode->crtc_hdisplay;
> > +	dst_h = adjusted_mode->crtc_vdisplay;
> > +
> > +	fp_w_ratio = div_fixed16(src_w, dst_w);
> > +	fp_h_ratio = div_fixed16(src_h, dst_h);
> > +	downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
> > +	downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
> > +
> > +	return mul_fixed16(downscale_w, downscale_h);
> > +}
> > +
> > +uint_fixed_16_16_t
> > +icl_get_pipe_ratio(const struct intel_crtc_state *crtc_state)
> > +{
> > +	struct drm_plane *plane;
> > +	const struct drm_plane_state *drm_plane_state;
> > +	uint_fixed_16_16_t pipe_downscale, pipe_ratio;
> > +	uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
> > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
> > +
> > +	if (!crtc_state->uapi.enable)
> > +		return max_downscale;
> > +
> > +	drm_atomic_crtc_state_for_each_plane_state(plane,
> > drm_plane_state, &crtc_state->uapi) {
> > +		uint_fixed_16_16_t plane_downscale;
> > +		const struct intel_plane_state *plane_state =
> > +			to_intel_plane_state(drm_plane_state);
> > +
> > +		if (!intel_wm_plane_visible(crtc_state, plane_state))
> > +			continue;
> > +
> > +		plane_downscale =
> > skl_plane_downscale_amount(crtc_state, plane_state);
> > +
> > +		DRM_DEBUG_KMS("Plane %d downscale amount %d.%d\n",
> > +			      to_intel_plane(plane)->id,
> > +			      plane_downscale.val >> 16,
> > +			      plane_downscale.val & 0xffff);
> > +
> > +		max_downscale = max_fixed16(plane_downscale,
> > max_downscale);
> > +	}
> > +
> > +
> > +	pipe_downscale = skl_pipe_downscale_amount(crtc_state);
> > +
> > +	DRM_DEBUG_KMS("Pipe %d downscale amount %d.%d\n",
> > +		       crtc->pipe, pipe_downscale.val >> 16,
> > +		       pipe_downscale.val & 0xffff);
> > +
> > +	pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
> > +
> > +	/* Convert result to percentage */
> > +	pipe_ratio = u32_to_fixed16(100 * div_round_up_u32_fixed16(1,
> > pipe_downscale));
> > +
> > +	DRM_DEBUG_KMS("Pipe %d ratio %d.%d\n", crtc->pipe,
> > +		      pipe_ratio.val >> 16, pipe_ratio.val & 0xffff);
> > +
> > +	return pipe_ratio;
> > +}
> > +
> > +struct dbuf_slice_conf_entry {
> > +	u32 dbuf_mask[I915_MAX_PIPES];
> > +	u32 active_pipes;
> > +};
> > +
> > +
> > +#define ICL_PIPE_A_DBUF_SLICES(DBuf1)  \
> > +	{ { DBuf1, 0, 0, 0 }, BIT(PIPE_A) }
> > +#define ICL_PIPE_B_DBUF_SLICES(DBuf1)  \
> > +	{ { 0, DBuf1, 0, 0 }, BIT(PIPE_B) }
> > +#define ICL_PIPE_C_DBUF_SLICES(DBuf1)  \
> > +	{ { 0, 0, DBuf1, 0 }, BIT(PIPE_C) }
> > +#define ICL_PIPE_D_DBUF_SLICES(DBuf1)  \
> > +	{ { 0, 0, 0, DBuf1 }, BIT(PIPE_D) }
> > +#define ICL_PIPE_AB_DBUF_SLICES(DBuf1, DBuf2)   \
> > +	{ { DBuf1, DBuf2, 0, 0 }, BIT(PIPE_A) | BIT(PIPE_B) }
> > +#define ICL_PIPE_BC_DBUF_SLICES(DBuf1, DBuf2)   \
> > +	{ { 0, DBuf1, DBuf2, 0 }, BIT(PIPE_B) | BIT(PIPE_C) }
> > +#define ICL_PIPE_BD_DBUF_SLICES(DBuf1, DBuf2)   \
> > +	{ { 0, DBuf1, 0, DBuf2 }, BIT(PIPE_B) | BIT(PIPE_D) }
> > +#define ICL_PIPE_AC_DBUF_SLICES(DBuf1, DBuf2)   \
> > +	{ { DBuf1, 0, DBuf2, 0 }, BIT(PIPE_A) | BIT(PIPE_C) }
> > +#define ICL_PIPE_AD_DBUF_SLICES(DBuf1, DBuf2)   \
> > +	{ { DBuf1, 0, 0, DBuf2 }, BIT(PIPE_A) | BIT(PIPE_D) }
> > +#define ICL_PIPE_CD_DBUF_SLICES(DBuf1, DBuf2)   \
> > +	{ { 0, 0, DBuf1, DBuf2 }, BIT(PIPE_C) | BIT(PIPE_D) }
> > +#define ICL_PIPE_ABC_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> > +	{ { DBuf1, DBuf2, DBuf3, 0 }, BIT(PIPE_A) | BIT(PIPE_B) |
> > BIT(PIPE_C) }
> > +#define ICL_PIPE_ACD_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> > +	{ { DBuf1, 0, DBuf2, DBuf3 }, BIT(PIPE_A) | BIT(PIPE_C) |
> > BIT(PIPE_D) }
> > +#define ICL_PIPE_BCD_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> > +	{ { 0, DBuf1, DBuf2, DBuf3 }, BIT(PIPE_B) | BIT(PIPE_C) |
> > BIT(PIPE_D) }
> > +#define ICL_PIPE_ABD_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> > +	{ { DBuf1, DBuf2, 0, DBuf3 }, BIT(PIPE_A) | BIT(PIPE_B) |
> > BIT(PIPE_D) }
> > +#define ICL_PIPE_ABC_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
> > +	{ { DBuf1, DBuf2, DBuf3, 0 }, BIT(PIPE_A) | BIT(PIPE_B) |
> > BIT(PIPE_C) }
> > +#define ICL_PIPE_ABCD_DBUF_SLICES(DBuf1, DBuf2, DBuf3, DBuf4)  \
> > +	{ { DBuf1, DBuf2, DBuf3, DBuf4 }, BIT(PIPE_A) | BIT(PIPE_B) | \
> > +					  BIT(PIPE_C) | BIT(PIPE_D) }
> > +
> > +/*
> > + * Table taken from Bspec 12716
> > + * Pipes do have some preferred DBuf slice affinity,
> > + * plus there are some hardcoded requirements on how
> > + * those should be distributed for multipipe scenarios.
> > + * For more DBuf slices algorithm can get even more messy
> > + * and less readable, so decided to use a table almost
> > + * as is from BSpec itself - that way it is at least easier
> > + * to compare, change and check.
> > + */
> > +struct dbuf_slice_conf_entry icl_allowed_dbufs[] = {
> > +{ { 0, 0, 0, 0 }, 0 },
> > +ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),  /* pipe ratio
> > < 88.8 */
> > +ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT),                /* for pipe
> > ratio >= 88.8 */
> > +ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),  /* pipe ratio
> > < 88.8 */
> > +ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT),                /* for pipe
> > ratio >= 88.8 */
> > +ICL_PIPE_C_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),  /* pipe ratio
> > < 88.8 */
> > +ICL_PIPE_C_DBUF_SLICES(DBUF_S2_BIT),                /* for pipe
> > ratio >= 88.8 */
> > +ICL_PIPE_AB_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> > +ICL_PIPE_AC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> > +ICL_PIPE_BC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> > +ICL_PIPE_ABC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
> > +};
> > +
> > +/*
> > + * Table taken from Bspec 49255
> > + * Pipes do have some preferred DBuf slice affinity,
> > + * plus there are some hardcoded requirements on how
> > + * those should be distributed for multipipe scenarios.
> > + * For more DBuf slices algorithm can get even more messy
> > + * and less readable, so decided to use a table almost
> > + * as is from BSpec itself - that way it is at least easier
> > + * to compare, change and check.
> > + */
> > +struct dbuf_slice_conf_entry tgl_allowed_dbufs[] = {
> > +{ { 0, 0, 0, 0 }, 0 },
> > +ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> > +ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> > +ICL_PIPE_C_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> > +ICL_PIPE_D_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
> > +ICL_PIPE_AB_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> > +ICL_PIPE_AC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> > +ICL_PIPE_BC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> > +ICL_PIPE_AD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> > +ICL_PIPE_BD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> > +ICL_PIPE_CD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
> > +ICL_PIPE_ABD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
> > +ICL_PIPE_ABC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
> > +ICL_PIPE_ACD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
> > +ICL_PIPE_BCD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
> > +ICL_PIPE_ABCD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT,
> > DBUF_S2_BIT),
> > +};
> > +
> > +/*
> > + * This function finds an entry with same enabled pipe
> > configuration and
> > + * returns correspondent DBuf slice mask as stated in BSpec for
> > particular
> > + * platform.
> > + */
> > +static u32 icl_get_allowed_dbuf_mask(int pipe,
> > +				     u32 active_pipes,
> > +				     const struct intel_crtc_state
> > *crtc_state)
> > +{
> > +	int i;
> > +	uint_fixed_16_16_t pipe_ratio;
> > +
> > +	/*
> > +	 * Calculate pipe ratio as stated in BSpec 28692
> > +	 */
> > +	pipe_ratio = icl_get_pipe_ratio(crtc_state);
> > +
> > +	for (i = 0; i < ARRAY_SIZE(icl_allowed_dbufs); i++) {
> > +		if (icl_allowed_dbufs[i].active_pipes == active_pipes)
> > {
> > +			if (hweight32(active_pipes) == 1) {
> > +				/*
> > +				 * According to BSpec 12716: if ratio
> > >= 88.8
> > +				 * always use single dbuf slice.
> > +				 */
> > +				if (pipe_ratio.val >= div_fixed16(888,
> > 10).val)
> > +					++i;
> > +			}
> > +			return icl_allowed_dbufs[i].dbuf_mask[pipe];
> > +		}
> > +	}
> > +	return 0;
> > +}
> > +
> > +/*
> > + * This function finds an entry with same enabled pipe
> > configuration and
> > + * returns correspondent DBuf slice mask as stated in BSpec for
> > particular
> > + * platform.
> > + */
> > +static u32 tgl_get_allowed_dbuf_mask(int pipe,
> > +				     u32 active_pipes,
> > +				     const struct intel_crtc_state
> > *crtc_state)
> > +{
> > +	int i;
> > +
> > +	for (i = 0; i < ARRAY_SIZE(tgl_allowed_dbufs); i++) {
> > +		if (tgl_allowed_dbufs[i].active_pipes == active_pipes)
> > +			return tgl_allowed_dbufs[i].dbuf_mask[pipe];
> > +	}
> > +	return 0;
> > +}
> > +
> > +u32 i915_get_allowed_dbuf_mask(struct drm_i915_private *dev_priv,
> > +				      int pipe, u32 active_pipes,
> > +				      const struct intel_crtc_state
> > *crtc_state)
> > +{
> > +	if (IS_GEN(dev_priv, 11))
> > +		return icl_get_allowed_dbuf_mask(pipe,
> > +						 active_pipes,
> > +						 crtc_state);
> > +	else if (IS_GEN(dev_priv, 12))
> > +		return tgl_get_allowed_dbuf_mask(pipe,
> > +						 active_pipes,
> > +						 crtc_state);
> > +	/*
> > +	 * For anything else just return one slice yet.
> > +	 * Should be extended for other platforms.
> > +	 */
> > +	return DBUF_S1_BIT;
> > +}
> > +
> >  static u64
> >  icl_get_total_relative_data_rate(struct intel_crtc_state
> > *crtc_state,
> >  				 u64 *plane_data_rate)
> > -- 
> > 2.17.1
> > 
> 
>

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 876fc25968bf..ee63da18772a 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -13386,8 +13386,9 @@  static void verify_wm_state(struct intel_crtc *crtc,
 	sw_ddb = &dev_priv->wm.skl_hw.ddb;
 
 	if (INTEL_GEN(dev_priv) >= 11 &&
-	    hw->ddb.enabled_slices != sw_ddb->enabled_slices)
-		DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n",
+	    hw->ddb.enabled_slices != sw_ddb->enabled_slices &&
+	    needs_modeset(new_crtc_state))
+		DRM_ERROR("mismatch in DBUF Slices (expected %x, got %x)\n",
 			  sw_ddb->enabled_slices,
 			  hw->ddb.enabled_slices);
 
@@ -14613,16 +14614,21 @@  static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 	int i;
 	u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
 	u8 required_slices = state->wm_results.ddb.enabled_slices;
-	struct skl_ddb_entry entries[I915_MAX_PIPES] = {};
+	u8 slices_union = hw_enabled_slices | required_slices;
+	u8 slices_intersection = slices_union & required_slices;
 
+	struct skl_ddb_entry entries[I915_MAX_PIPES] = {};
 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i)
 		/* ignore allocations for crtc's that have been turned off. */
 		if (new_crtc_state->hw.active)
 			entries[i] = old_crtc_state->wm.skl.ddb;
 
-	/* If 2nd DBuf slice required, enable it here */
-	if (INTEL_GEN(dev_priv) >= 11 && required_slices > hw_enabled_slices)
-		icl_dbuf_slices_update(dev_priv, required_slices);
+	DRM_DEBUG_KMS("DBuf req slices %d hw slices %d\n",
+		      required_slices, hw_enabled_slices);
+
+	/* If some other DBuf slice required, enable it here */
+	if (INTEL_GEN(dev_priv) >= 11 && required_slices != hw_enabled_slices)
+		icl_dbuf_slices_update(dev_priv, slices_union);
 
 	/*
 	 * Whenever the number of active pipes changes, we need to make sure we
@@ -14682,8 +14688,8 @@  static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 	} while (progress);
 
 	/* If 2nd DBuf slice is no more required disable it */
-	if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices)
-		icl_dbuf_slices_update(dev_priv, required_slices);
+	if (INTEL_GEN(dev_priv) >= 11 && required_slices != hw_enabled_slices)
+		icl_dbuf_slices_update(dev_priv, slices_intersection);
 }
 
 static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index ce1b64f4dd44..c04ebb19bb4f 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -1040,6 +1040,21 @@  static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv)
 	     "Unexpected DBuf power power state (0x%08x)\n", tmp);
 }
 
+static void gen11_assert_dbuf_enabled(struct drm_i915_private *dev_priv)
+{
+	u32 tmp = I915_READ(DBUF_CTL_S1);
+	u32 tmp2 = I915_READ(DBUF_CTL_S2);
+	bool s1_enabled = (tmp & (DBUF_POWER_STATE | DBUF_POWER_REQUEST)) ==
+			  (DBUF_POWER_STATE | DBUF_POWER_REQUEST);
+
+	bool s2_enabled = (tmp2 & (DBUF_POWER_STATE | DBUF_POWER_REQUEST)) ==
+			  (DBUF_POWER_STATE | DBUF_POWER_REQUEST);
+
+	WARN(!s1_enabled && !s2_enabled,
+	     "Unexpected DBuf power power state (0x%08x, 0x%08x)\n", tmp,
+								     tmp2);
+}
+
 static void gen9_disable_dc_states(struct drm_i915_private *dev_priv)
 {
 	struct intel_cdclk_state cdclk_state = {};
@@ -1055,7 +1070,10 @@  static void gen9_disable_dc_states(struct drm_i915_private *dev_priv)
 	/* Can't read out voltage_level so can't use intel_cdclk_changed() */
 	WARN_ON(intel_cdclk_needs_modeset(&dev_priv->cdclk.hw, &cdclk_state));
 
-	gen9_assert_dbuf_enabled(dev_priv);
+	if (INTEL_GEN(dev_priv) >= 11)
+		gen11_assert_dbuf_enabled(dev_priv);
+	else
+		gen9_assert_dbuf_enabled(dev_priv);
 
 	if (IS_GEN9_LP(dev_priv))
 		bxt_verify_ddi_phy_power_wells(dev_priv);
@@ -4254,31 +4272,53 @@  static void gen9_dbuf_disable(struct drm_i915_private *dev_priv)
 	intel_dbuf_slice_set(dev_priv, DBUF_CTL, false);
 }
 
-static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
+u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
 {
 	if (INTEL_GEN(dev_priv) < 11)
 		return 1;
 	return 2;
 }
 
+void icl_dbuf_slices_restore(struct drm_i915_private *dev_priv)
+{
+	const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
+
+	icl_dbuf_slices_update(dev_priv, hw_enabled_slices);
+}
+
 void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 			    u8 req_slices)
 {
-	const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
-	bool ret;
+	bool ret = true;
+	int i;
+	int max_slices = intel_dbuf_max_slices(dev_priv);
 
-	if (req_slices > intel_dbuf_max_slices(dev_priv)) {
+	if (hweight8(req_slices) > intel_dbuf_max_slices(dev_priv)) {
 		DRM_ERROR("Invalid number of dbuf slices requested\n");
 		return;
 	}
 
-	if (req_slices == hw_enabled_slices || req_slices == 0)
-		return;
+	DRM_DEBUG_KMS("Updating dbuf slices to %x\n", req_slices);
 
-	if (req_slices > hw_enabled_slices)
-		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true);
-	else
-		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, false);
+	for (i = 0; i < max_slices; i++) {
+		int slice_bit = 1 << i;
+		bool slice_set = ((slice_bit) & req_slices) != 0;
+
+		switch (slice_bit) {
+		case DBUF_S1_BIT:
+			ret = ret && intel_dbuf_slice_set(dev_priv,
+							  DBUF_CTL_S1,
+							  slice_set);
+			break;
+		case DBUF_S2_BIT:
+			ret = ret && intel_dbuf_slice_set(dev_priv,
+							  DBUF_CTL_S2,
+							  slice_set);
+			break;
+		default:
+			WARN(1, "Unsupported DBUF slice");
+		}
+	}
 
 	if (ret)
 		dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices;
@@ -4286,40 +4326,21 @@  void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 
 static void icl_dbuf_enable(struct drm_i915_private *dev_priv)
 {
-	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST);
-	I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) | DBUF_POWER_REQUEST);
-	POSTING_READ(DBUF_CTL_S2);
-
-	udelay(10);
-
-	if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
-	    !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
-		DRM_ERROR("DBuf power enable timeout\n");
-	else
-		/*
-		 * FIXME: for now pretend that we only have 1 slice, see
-		 * intel_enabled_dbuf_slices_num().
-		 */
-		dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
+	/*
+	 * For now pretend that we only have 1 slice, we will
+	 * figure out later which slices we have and what we need.
+	 */
+	dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
+	icl_dbuf_slices_restore(dev_priv);
 }
 
 static void icl_dbuf_disable(struct drm_i915_private *dev_priv)
 {
-	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) & ~DBUF_POWER_REQUEST);
-	I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) & ~DBUF_POWER_REQUEST);
-	POSTING_READ(DBUF_CTL_S2);
-
-	udelay(10);
-
-	if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
-	    (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
-		DRM_ERROR("DBuf power disable timeout!\n");
-	else
-		/*
-		 * FIXME: for now pretend that the first slice is always
-		 * enabled, see intel_enabled_dbuf_slices_num().
-		 */
-		dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
+	/*
+	 * Disable all slices
+	 */
+	dev_priv->wm.skl_hw.ddb.enabled_slices = 0;
+	icl_dbuf_slices_restore(dev_priv);
 }
 
 static void icl_mbus_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
index 1da04f3e0fb3..4d5bb0accef7 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.h
+++ b/drivers/gpu/drm/i915/display/intel_display_power.h
@@ -311,8 +311,10 @@  intel_display_power_put_async(struct drm_i915_private *i915,
 	for ((wf) = intel_display_power_get((i915), (domain)); (wf); \
 	     intel_display_power_put_async((i915), (domain), (wf)), (wf) = 0)
 
+u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv);
 void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 			    u8 req_slices);
+void icl_dbuf_slices_restore(struct drm_i915_private *dev_priv);
 
 void chv_phy_powergate_lanes(struct intel_encoder *encoder,
 			     bool override, unsigned int mask);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 82e4e6bf08c3..0307aec2c928 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -51,6 +51,7 @@ 
 #include "display/intel_bw.h"
 #include "display/intel_cdclk.h"
 #include "display/intel_display_types.h"
+#include "display/intel_display_power.h"
 #include "display/intel_dp.h"
 #include "display/intel_fbdev.h"
 #include "display/intel_hotplug.h"
@@ -2588,6 +2589,10 @@  static int intel_runtime_resume(struct device *kdev)
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		ret = vlv_resume_prepare(dev_priv, true);
 
+	/* Weird hack to fix ICL hardware bug, as it resets DBUF slices reg */
+	if (INTEL_GEN(dev_priv) == 11)
+		icl_dbuf_slices_restore(dev_priv);
+
 	intel_uncore_runtime_resume(&dev_priv->uncore);
 
 	intel_runtime_pm_enable_interrupts(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7e0f67babe20..78bd010cb59a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -804,7 +804,8 @@  static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1,
 }
 
 struct skl_ddb_allocation {
-	u8 enabled_slices; /* GEN11 has configurable 2 slices */
+	u8 enabled_slices;   /* Bitmask of currently enabled DBuf slices */
+	u8 supported_slices; /* GEN 11 supports 2 slices */
 };
 
 struct skl_ddb_values {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index a607ea520829..7eca2adbb281 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7734,6 +7734,8 @@  enum {
 #define DISP_ARB_CTL2	_MMIO(0x45004)
 #define  DISP_DATA_PARTITION_5_6	(1 << 6)
 #define  DISP_IPC_ENABLE		(1 << 3)
+#define DBUF_S1_BIT			1
+#define DBUF_S2_BIT			2
 #define DBUF_CTL	_MMIO(0x45008)
 #define DBUF_CTL_S1	_MMIO(0x45008)
 #define DBUF_CTL_S2	_MMIO(0x44FE8)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 2d389e437e87..ebe537a47879 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3586,24 +3586,20 @@  bool ilk_disable_lp_wm(struct drm_device *dev)
 	return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
 }
 
-static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
+static u8 intel_enabled_dbuf_slices(struct drm_i915_private *dev_priv)
 {
-	u8 enabled_slices;
-
-	/* Slice 1 will always be enabled */
-	enabled_slices = 1;
+	u8 enabled_slices = 0;
 
 	/* Gen prior to GEN11 have only one DBuf slice */
 	if (INTEL_GEN(dev_priv) < 11)
-		return enabled_slices;
+		return 1;
 
-	/*
-	 * FIXME: for now we'll only ever use 1 slice; pretend that we have
-	 * only that 1 slice enabled until we have a proper way for on-demand
-	 * toggling of the second slice.
-	 */
-	if (0 && I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
-		enabled_slices++;
+	/* Check if second DBuf slice is enabled */
+	if (I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE)
+		enabled_slices |= 1;
+
+	if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
+		enabled_slices |= 2;
 
 	return enabled_slices;
 }
@@ -3812,36 +3808,44 @@  static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
 			      const int num_active,
 			      struct skl_ddb_allocation *ddb)
 {
-	const struct drm_display_mode *adjusted_mode;
-	u64 total_data_bw;
 	u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
 
 	WARN_ON(ddb_size == 0);
+	ddb->supported_slices = intel_dbuf_max_slices(dev_priv);
 
 	if (INTEL_GEN(dev_priv) < 11)
 		return ddb_size - 4; /* 4 blocks for bypass path allocation */
 
-	adjusted_mode = &crtc_state->hw.adjusted_mode;
-	total_data_bw = total_data_rate * drm_mode_vrefresh(adjusted_mode);
+	return ddb_size;
+}
 
-	/*
-	 * 12GB/s is maximum BW supported by single DBuf slice.
-	 *
-	 * FIXME dbuf slice code is broken:
-	 * - must wait for planes to stop using the slice before powering it off
-	 * - plane straddling both slices is illegal in multi-pipe scenarios
-	 * - should validate we stay within the hw bandwidth limits
-	 */
-	if (0 && (num_active > 1 || total_data_bw >= GBps(12))) {
-		ddb->enabled_slices = 2;
-	} else {
-		ddb->enabled_slices = 1;
-		ddb_size /= 2;
-	}
+/*
+ * Calculate initial DBuf slice offset, based on slice size
+ * and mask(i.e if slice size is 1024 and second slice is enabled
+ * offset would be 1024)
+ */
+static u32 skl_get_first_dbuf_slice_offset(u32 dbuf_slice_mask,
+					   u32 slice_size, u32 ddb_size)
+{
+	u32 offset = 0;
 
-	return ddb_size;
+	if (!dbuf_slice_mask)
+		return 0;
+
+	while (!(dbuf_slice_mask & 1)) {
+		dbuf_slice_mask >>= 1;
+		offset += slice_size;
+		if (offset >= ddb_size)
+			break;
+	}
+	WARN_ON(offset >= ddb_size);
+	return offset;
 }
 
+static u32 i915_get_allowed_dbuf_mask(struct drm_i915_private *dev_priv,
+				      int pipe, u32 active_pipes,
+				      const struct intel_crtc_state *crtc_state);
+
 static void
 skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
 				   const struct intel_crtc_state *crtc_state,
@@ -3857,7 +3861,14 @@  skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
 	u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
 	enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
 	u16 ddb_size;
+	u32 ddb_range_size;
 	u32 i;
+	u32 dbuf_slice_mask;
+	u32 active_pipes;
+	u32 offset;
+	u32 slice_size;
+	u32 total_slice_mask;
+	u32 start, end;
 
 	if (WARN_ON(!state) || !crtc_state->hw.active) {
 		alloc->start = 0;
@@ -3866,14 +3877,23 @@  skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
 		return;
 	}
 
-	if (intel_state->active_pipe_changes)
+	if (intel_state->active_pipe_changes) {
 		*num_active = hweight8(intel_state->active_pipes);
-	else
+		active_pipes = intel_state->active_pipes;
+	} else {
 		*num_active = hweight8(dev_priv->active_pipes);
+		active_pipes = dev_priv->active_pipes;
+	}
 
 	ddb_size = intel_get_ddb_size(dev_priv, crtc_state, total_data_rate,
 				      *num_active, ddb);
 
+	DRM_DEBUG_KMS("Got total ddb size %d\n", ddb_size);
+
+	slice_size = ddb_size / ddb->supported_slices;
+
+	DRM_DEBUG_KMS("Got DBuf slice size %d\n", slice_size);
+
 	/*
 	 * If the state doesn't change the active CRTC's or there is no
 	 * modeset request, then there's no need to recalculate;
@@ -3891,20 +3911,73 @@  skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
 		return;
 	}
 
+	/*
+	 * Get allowed DBuf slices for correspondent pipe and platform.
+	 */
+	dbuf_slice_mask = i915_get_allowed_dbuf_mask(dev_priv, for_pipe,
+						     active_pipes, crtc_state);
+
+	DRM_DEBUG_KMS("DBuf slice mask %x pipe %d active pipes %x\n",
+		      dbuf_slice_mask,
+		      for_pipe, active_pipes);
+
+	/*
+	 * Figure out at which DBuf slice we start, i.e if we start at Dbuf S2
+	 * and slice size is 1024, the offset would be 1024
+	 */
+	offset = skl_get_first_dbuf_slice_offset(dbuf_slice_mask,
+						 slice_size, ddb_size);
+
+	/*
+	 * Figure out total size of allowed DBuf slices, which is basically
+	 * a number of allowed slices for that pipe multiplied by slice size.
+	 * We might still have some dbuf slices disabled in case if those
+	 * are not needed based on bandwidth requirements and num_active pipes,
+	 * so stick to real ddb size if it happens to be less. Inside of this
+	 * range ddb entries are still allocated in proportion to display width.
+	 */
+	ddb_range_size = min(hweight8(dbuf_slice_mask) * slice_size,
+			     (unsigned int)ddb_size);
+
 	/*
 	 * Watermark/ddb requirement highly depends upon width of the
 	 * framebuffer, So instead of allocating DDB equally among pipes
 	 * distribute DDB based on resolution/width of the display.
 	 */
+	total_slice_mask = dbuf_slice_mask;
 	for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
 		const struct drm_display_mode *adjusted_mode =
 			&crtc_state->hw.adjusted_mode;
 		enum pipe pipe = crtc->pipe;
 		int hdisplay, vdisplay;
+		u32 pipe_dbuf_slice_mask = \
+			i915_get_allowed_dbuf_mask(dev_priv,
+						pipe,
+						active_pipes,
+						crtc_state);
 
 		if (!crtc_state->hw.enable)
 			continue;
 
+		/*
+		 * According to BSpec pipe can share one dbuf slice with another
+		 * pipes or pipe can use multiple dbufs, in both cases we
+		 * account for other pipes only if they have exactly same mask.
+		 * However we need to account how many slices we should enable
+		 * in total.
+		 */
+		total_slice_mask |= pipe_dbuf_slice_mask;
+
+		/*
+		 * Do not account pipes using other slice sets
+		 * luckily as of current BSpec slice sets do not partially
+		 * intersect(pipes share either same one slice or same slice set
+		 * i.e no partial intersection), so it is enough to check for
+		 * equality for now.
+		 */
+		if (dbuf_slice_mask != pipe_dbuf_slice_mask)
+			continue;
+
 		drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
 		total_width += hdisplay;
 
@@ -3914,8 +3987,19 @@  skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv,
 			pipe_width = hdisplay;
 	}
 
-	alloc->start = ddb_size * width_before_pipe / total_width;
-	alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
+	ddb->enabled_slices = total_slice_mask;
+
+	start = ddb_range_size * width_before_pipe / total_width;
+	end = ddb_range_size * (width_before_pipe + pipe_width) / total_width;
+
+	alloc->start = offset + start;
+	alloc->end = offset + end;
+
+	DRM_DEBUG_KMS("Pipe %d ddb %d-%d\n", for_pipe,
+		      alloc->start, alloc->end);
+	DRM_DEBUG_KMS("Enabled ddb slices mask %x num supported %d\n",
+		      ddb->enabled_slices,
+		      ddb->supported_slices);
 }
 
 static int skl_compute_wm_params(const struct intel_crtc_state *crtc_state,
@@ -4036,7 +4120,8 @@  void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 			  struct skl_ddb_allocation *ddb /* out */)
 {
-	ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
+	ddb->enabled_slices = intel_enabled_dbuf_slices(dev_priv);
+	DRM_DEBUG_KMS("Got hw dbuf slices mask %x\n", ddb->enabled_slices);
 }
 
 /*
@@ -4164,6 +4249,238 @@  skl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
 	return total_data_rate;
 }
 
+uint_fixed_16_16_t
+skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
+{
+	u32 src_w, src_h, dst_w, dst_h;
+	uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
+	uint_fixed_16_16_t downscale_h, downscale_w;
+	const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode;
+
+	src_w = crtc_state->pipe_src_w;
+	src_h = crtc_state->pipe_src_h;
+	dst_w = adjusted_mode->crtc_hdisplay;
+	dst_h = adjusted_mode->crtc_vdisplay;
+
+	fp_w_ratio = div_fixed16(src_w, dst_w);
+	fp_h_ratio = div_fixed16(src_h, dst_h);
+	downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
+	downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
+
+	return mul_fixed16(downscale_w, downscale_h);
+}
+
+uint_fixed_16_16_t
+icl_get_pipe_ratio(const struct intel_crtc_state *crtc_state)
+{
+	struct drm_plane *plane;
+	const struct drm_plane_state *drm_plane_state;
+	uint_fixed_16_16_t pipe_downscale, pipe_ratio;
+	uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+
+	if (!crtc_state->uapi.enable)
+		return max_downscale;
+
+	drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->uapi) {
+		uint_fixed_16_16_t plane_downscale;
+		const struct intel_plane_state *plane_state =
+			to_intel_plane_state(drm_plane_state);
+
+		if (!intel_wm_plane_visible(crtc_state, plane_state))
+			continue;
+
+		plane_downscale = skl_plane_downscale_amount(crtc_state, plane_state);
+
+		DRM_DEBUG_KMS("Plane %d downscale amount %d.%d\n",
+			      to_intel_plane(plane)->id,
+			      plane_downscale.val >> 16,
+			      plane_downscale.val & 0xffff);
+
+		max_downscale = max_fixed16(plane_downscale, max_downscale);
+	}
+
+
+	pipe_downscale = skl_pipe_downscale_amount(crtc_state);
+
+	DRM_DEBUG_KMS("Pipe %d downscale amount %d.%d\n",
+		       crtc->pipe, pipe_downscale.val >> 16,
+		       pipe_downscale.val & 0xffff);
+
+	pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
+
+	/* Convert result to percentage */
+	pipe_ratio = u32_to_fixed16(100 * div_round_up_u32_fixed16(1, pipe_downscale));
+
+	DRM_DEBUG_KMS("Pipe %d ratio %d.%d\n", crtc->pipe,
+		      pipe_ratio.val >> 16, pipe_ratio.val & 0xffff);
+
+	return pipe_ratio;
+}
+
+struct dbuf_slice_conf_entry {
+	u32 dbuf_mask[I915_MAX_PIPES];
+	u32 active_pipes;
+};
+
+
+#define ICL_PIPE_A_DBUF_SLICES(DBuf1)  \
+	{ { DBuf1, 0, 0, 0 }, BIT(PIPE_A) }
+#define ICL_PIPE_B_DBUF_SLICES(DBuf1)  \
+	{ { 0, DBuf1, 0, 0 }, BIT(PIPE_B) }
+#define ICL_PIPE_C_DBUF_SLICES(DBuf1)  \
+	{ { 0, 0, DBuf1, 0 }, BIT(PIPE_C) }
+#define ICL_PIPE_D_DBUF_SLICES(DBuf1)  \
+	{ { 0, 0, 0, DBuf1 }, BIT(PIPE_D) }
+#define ICL_PIPE_AB_DBUF_SLICES(DBuf1, DBuf2)   \
+	{ { DBuf1, DBuf2, 0, 0 }, BIT(PIPE_A) | BIT(PIPE_B) }
+#define ICL_PIPE_BC_DBUF_SLICES(DBuf1, DBuf2)   \
+	{ { 0, DBuf1, DBuf2, 0 }, BIT(PIPE_B) | BIT(PIPE_C) }
+#define ICL_PIPE_BD_DBUF_SLICES(DBuf1, DBuf2)   \
+	{ { 0, DBuf1, 0, DBuf2 }, BIT(PIPE_B) | BIT(PIPE_D) }
+#define ICL_PIPE_AC_DBUF_SLICES(DBuf1, DBuf2)   \
+	{ { DBuf1, 0, DBuf2, 0 }, BIT(PIPE_A) | BIT(PIPE_C) }
+#define ICL_PIPE_AD_DBUF_SLICES(DBuf1, DBuf2)   \
+	{ { DBuf1, 0, 0, DBuf2 }, BIT(PIPE_A) | BIT(PIPE_D) }
+#define ICL_PIPE_CD_DBUF_SLICES(DBuf1, DBuf2)   \
+	{ { 0, 0, DBuf1, DBuf2 }, BIT(PIPE_C) | BIT(PIPE_D) }
+#define ICL_PIPE_ABC_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
+	{ { DBuf1, DBuf2, DBuf3, 0 }, BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) }
+#define ICL_PIPE_ACD_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
+	{ { DBuf1, 0, DBuf2, DBuf3 }, BIT(PIPE_A) | BIT(PIPE_C) | BIT(PIPE_D) }
+#define ICL_PIPE_BCD_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
+	{ { 0, DBuf1, DBuf2, DBuf3 }, BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D) }
+#define ICL_PIPE_ABD_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
+	{ { DBuf1, DBuf2, 0, DBuf3 }, BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_D) }
+#define ICL_PIPE_ABC_DBUF_SLICES(DBuf1, DBuf2, DBuf3)  \
+	{ { DBuf1, DBuf2, DBuf3, 0 }, BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) }
+#define ICL_PIPE_ABCD_DBUF_SLICES(DBuf1, DBuf2, DBuf3, DBuf4)  \
+	{ { DBuf1, DBuf2, DBuf3, DBuf4 }, BIT(PIPE_A) | BIT(PIPE_B) | \
+					  BIT(PIPE_C) | BIT(PIPE_D) }
+
+/*
+ * Table taken from Bspec 12716
+ * Pipes do have some preferred DBuf slice affinity,
+ * plus there are some hardcoded requirements on how
+ * those should be distributed for multipipe scenarios.
+ * For more DBuf slices algorithm can get even more messy
+ * and less readable, so decided to use a table almost
+ * as is from BSpec itself - that way it is at least easier
+ * to compare, change and check.
+ */
+struct dbuf_slice_conf_entry icl_allowed_dbufs[] = {
+{ { 0, 0, 0, 0 }, 0 },
+ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),  /* pipe ratio < 88.8 */
+ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT),                /* for pipe ratio >= 88.8 */
+ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),  /* pipe ratio < 88.8 */
+ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT),                /* for pipe ratio >= 88.8 */
+ICL_PIPE_C_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),  /* pipe ratio < 88.8 */
+ICL_PIPE_C_DBUF_SLICES(DBUF_S2_BIT),                /* for pipe ratio >= 88.8 */
+ICL_PIPE_AB_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
+ICL_PIPE_AC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
+ICL_PIPE_BC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
+ICL_PIPE_ABC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
+};
+
+/*
+ * Table taken from Bspec 49255
+ * Pipes do have some preferred DBuf slice affinity,
+ * plus there are some hardcoded requirements on how
+ * those should be distributed for multipipe scenarios.
+ * For more DBuf slices algorithm can get even more messy
+ * and less readable, so decided to use a table almost
+ * as is from BSpec itself - that way it is at least easier
+ * to compare, change and check.
+ */
+struct dbuf_slice_conf_entry tgl_allowed_dbufs[] = {
+{ { 0, 0, 0, 0 }, 0 },
+ICL_PIPE_A_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
+ICL_PIPE_B_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
+ICL_PIPE_C_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
+ICL_PIPE_D_DBUF_SLICES(DBUF_S1_BIT | DBUF_S2_BIT),
+ICL_PIPE_AB_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
+ICL_PIPE_AC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
+ICL_PIPE_BC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
+ICL_PIPE_AD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
+ICL_PIPE_BD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
+ICL_PIPE_CD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT),
+ICL_PIPE_ABD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
+ICL_PIPE_ABC_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT),
+ICL_PIPE_ACD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
+ICL_PIPE_BCD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
+ICL_PIPE_ABCD_DBUF_SLICES(DBUF_S1_BIT, DBUF_S1_BIT, DBUF_S2_BIT, DBUF_S2_BIT),
+};
+
+/*
+ * This function finds an entry with same enabled pipe configuration and
+ * returns correspondent DBuf slice mask as stated in BSpec for particular
+ * platform.
+ */
+static u32 icl_get_allowed_dbuf_mask(int pipe,
+				     u32 active_pipes,
+				     const struct intel_crtc_state *crtc_state)
+{
+	int i;
+	uint_fixed_16_16_t pipe_ratio;
+
+	/*
+	 * Calculate pipe ratio as stated in BSpec 28692
+	 */
+	pipe_ratio = icl_get_pipe_ratio(crtc_state);
+
+	for (i = 0; i < ARRAY_SIZE(icl_allowed_dbufs); i++) {
+		if (icl_allowed_dbufs[i].active_pipes == active_pipes) {
+			if (hweight32(active_pipes) == 1) {
+				/*
+				 * According to BSpec 12716: if ratio >= 88.8
+				 * always use single dbuf slice.
+				 */
+				if (pipe_ratio.val >= div_fixed16(888, 10).val)
+					++i;
+			}
+			return icl_allowed_dbufs[i].dbuf_mask[pipe];
+		}
+	}
+	return 0;
+}
+
+/*
+ * This function finds an entry with same enabled pipe configuration and
+ * returns correspondent DBuf slice mask as stated in BSpec for particular
+ * platform.
+ */
+static u32 tgl_get_allowed_dbuf_mask(int pipe,
+				     u32 active_pipes,
+				     const struct intel_crtc_state *crtc_state)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tgl_allowed_dbufs); i++) {
+		if (tgl_allowed_dbufs[i].active_pipes == active_pipes)
+			return tgl_allowed_dbufs[i].dbuf_mask[pipe];
+	}
+	return 0;
+}
+
+u32 i915_get_allowed_dbuf_mask(struct drm_i915_private *dev_priv,
+				      int pipe, u32 active_pipes,
+				      const struct intel_crtc_state *crtc_state)
+{
+	if (IS_GEN(dev_priv, 11))
+		return icl_get_allowed_dbuf_mask(pipe,
+						 active_pipes,
+						 crtc_state);
+	else if (IS_GEN(dev_priv, 12))
+		return tgl_get_allowed_dbuf_mask(pipe,
+						 active_pipes,
+						 crtc_state);
+	/*
+	 * For anything else just return one slice yet.
+	 * Should be extended for other platforms.
+	 */
+	return DBUF_S1_BIT;
+}
+
 static u64
 icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state,
 				 u64 *plane_data_rate)