diff mbox series

[v3,1/4] drm/i915/fbc: Rework cfb stride/size calculations

Message ID 20210923042151.19052-1-ville.syrjala@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [v3,1/4] drm/i915/fbc: Rework cfb stride/size calculations | expand

Commit Message

Ville Syrjälä Sept. 23, 2021, 4:21 a.m. UTC
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

The code to calculate the cfb stride/size is a bit of mess.
The cfb size is getting calculated based purely on the plane
stride and plane height. That doesn't account for extra
alignment we want for the cfb stride. The gen9 override
stride OTOH is just calculated based on the plane width, and
it does try to make things more aligned but any extra alignment
added there is not considered in the cfb size calculations.
So not at all convinced this is working as intended. Additionally
the compression limit handling is split between the cfb allocation
code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just
confusing.

Let's streamline the whole thing:
- Start with the plane stride, convert that into cfb stride (cfb is
  always 4 bytes per pixel). All the calculations will assume 1:1
  compression limit since that will give us the max values, and we
  don't yet know how much stolen memory we will be able to allocate
- Align the cfb stride to 512 bytes on modern platforms. This guarantees
  the 4 line segment will be 512 byte aligned regardles of the final
  compression limit we choose later. The 512 byte alignment for the
  segment is required by at least some of the platforms, and just doing
  it always seems like the easiest option
- Figure out if we need to use the override stride or not. For X-tiled
  it's never needed since the plane stride is already 512 byte aligned,
  for Y-tiled it will be needed if the plane stride is not a multiple
  of 512 bytes, and for linear it's apparently always needed because the
  hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of
  the PLANE_STRIDE*64 that it use with linear.
- The cfb size will be calculated based on the aligned cfb stride to
  guarantee we actually reserved enough stolen memory and the FBC hw
  won't end up scribbling over whatever else is allocated in stolen
- The compression limit handling we just do fully in the cfb allocation
  code to make things less confusing

v2: Write the min cfb segment stride calculation in a more
    explicit way to make it clear what is going on
v3: Remeber to update fbc->limit when changing to 16bpp

Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/display/intel_fbc.c | 180 +++++++++++++++--------
 drivers/gpu/drm/i915/i915_drv.h          |   4 +-
 2 files changed, 123 insertions(+), 61 deletions(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index b1c1a23c36be..e3934424040b 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -62,19 +62,76 @@  static void intel_fbc_get_plane_source_size(const struct intel_fbc_state_cache *
 		*height = cache->plane.src_h;
 }
 
-static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv,
-					const struct intel_fbc_state_cache *cache)
+/* plane stride in pixels */
+static unsigned int intel_fbc_plane_stride(const struct intel_plane_state *plane_state)
 {
-	int lines;
+	const struct drm_framebuffer *fb = plane_state->hw.fb;
+	unsigned int stride;
+
+	stride = plane_state->view.color_plane[0].stride;
+	if (!drm_rotation_90_or_270(plane_state->hw.rotation))
+		stride /= fb->format->cpp[0];
+
+	return stride;
+}
+
+/* plane stride based cfb stride in bytes, assuming 1:1 compression limit */
+static unsigned int _intel_fbc_cfb_stride(const struct intel_fbc_state_cache *cache)
+{
+	unsigned int cpp = 4; /* FBC always 4 bytes per pixel */
+
+	return cache->fb.stride * cpp;
+}
+
+/* minimum acceptable cfb stride in bytes, assuming 1:1 compression limit */
+static unsigned int skl_fbc_min_cfb_stride(const struct intel_fbc_state_cache *cache)
+{
+	unsigned int limit = 4; /* 1:4 compression limit is the worst case */
+	unsigned int cpp = 4; /* FBC always 4 bytes per pixel */
+	unsigned int height = 4; /* FBC segment is 4 lines */
+	unsigned int stride;
+
+	/* minimum segment stride we can use */
+	stride = cache->plane.src_w * cpp * height / limit;
+
+	/*
+	 * At least some of the platforms require each 4 line segment to
+	 * be 512 byte aligned. Just do it always for simplicity.
+	 */
+	stride = ALIGN(stride, 512);
+
+	/* convert back to single line equivalent with 1:1 compression limit */
+	return stride * limit / height;
+}
+
+/* properly aligned cfb stride in bytes, assuming 1:1 compression limit */
+static unsigned int intel_fbc_cfb_stride(struct drm_i915_private *i915,
+					 const struct intel_fbc_state_cache *cache)
+{
+	unsigned int stride = _intel_fbc_cfb_stride(cache);
+
+	/*
+	 * At least some of the platforms require each 4 line segment to
+	 * be 512 byte aligned. Aligning each line to 512 bytes guarantees
+	 * that regardless of the compression limit we choose later.
+	 */
+	if (DISPLAY_VER(i915) == 9)
+		return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(cache));
+	else
+		return stride;
+}
+
+static unsigned int intel_fbc_cfb_size(struct drm_i915_private *dev_priv,
+				       const struct intel_fbc_state_cache *cache)
+{
+	int lines = cache->plane.src_h;
 
-	intel_fbc_get_plane_source_size(cache, NULL, &lines);
 	if (DISPLAY_VER(dev_priv) == 7)
 		lines = min(lines, 2048);
 	else if (DISPLAY_VER(dev_priv) >= 8)
 		lines = min(lines, 2560);
 
-	/* Hardware needs the full buffer stride, not just the active area. */
-	return lines * cache->fb.stride;
+	return lines * intel_fbc_cfb_stride(dev_priv, cache);
 }
 
 static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv)
@@ -150,15 +207,9 @@  static bool i8xx_fbc_is_active(struct drm_i915_private *dev_priv)
 
 static u32 g4x_dpfc_ctl_limit(struct drm_i915_private *i915)
 {
-	const struct intel_fbc_reg_params *params = &i915->fbc.params;
-	int limit = i915->fbc.limit;
-
-	if (params->fb.format->cpp[0] == 2)
-		limit <<= 1;
-
-	switch (limit) {
+	switch (i915->fbc.limit) {
 	default:
-		MISSING_CASE(limit);
+		MISSING_CASE(i915->fbc.limit);
 		fallthrough;
 	case 1:
 		return DPFC_CTL_LIMIT_1X;
@@ -301,7 +352,8 @@  static bool ilk_fbc_is_active(struct drm_i915_private *dev_priv)
 
 static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
 {
-	struct intel_fbc_reg_params *params = &dev_priv->fbc.params;
+	struct intel_fbc *fbc = &dev_priv->fbc;
+	const struct intel_fbc_reg_params *params = &fbc->params;
 	u32 dpfc_ctl;
 
 	/* Display WA #0529: skl, kbl, bxt. */
@@ -310,7 +362,7 @@  static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
 
 		if (params->override_cfb_stride)
 			val |= CHICKEN_FBC_STRIDE_OVERRIDE |
-				CHICKEN_FBC_STRIDE(params->override_cfb_stride);
+				CHICKEN_FBC_STRIDE(params->override_cfb_stride / fbc->limit);
 
 		intel_de_rmw(dev_priv, CHICKEN_MISC_4,
 			     CHICKEN_FBC_STRIDE_OVERRIDE |
@@ -443,7 +495,12 @@  static u64 intel_fbc_stolen_end(struct drm_i915_private *dev_priv)
 	return min(end, intel_fbc_cfb_base_max(dev_priv));
 }
 
-static int intel_fbc_max_limit(struct drm_i915_private *dev_priv, int fb_cpp)
+static int intel_fbc_min_limit(int fb_cpp)
+{
+	return fb_cpp == 2 ? 2 : 1;
+}
+
+static int intel_fbc_max_limit(struct drm_i915_private *dev_priv)
 {
 	/*
 	 * FIXME: FBC1 can have arbitrary cfb stride,
@@ -457,16 +514,17 @@  static int intel_fbc_max_limit(struct drm_i915_private *dev_priv, int fb_cpp)
 		return 1;
 
 	/* FBC2 can only do 1:1, 1:2, 1:4 */
-	return fb_cpp == 2 ? 2 : 4;
+	return 4;
 }
 
 static int find_compression_limit(struct drm_i915_private *dev_priv,
-				  unsigned int size,
-				  unsigned int fb_cpp)
+				  unsigned int size, int min_limit)
 {
 	struct intel_fbc *fbc = &dev_priv->fbc;
 	u64 end = intel_fbc_stolen_end(dev_priv);
-	int ret, limit = 1;
+	int ret, limit = min_limit;
+
+	size /= limit;
 
 	/* Try to over-allocate to reduce reallocations and fragmentation. */
 	ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc->compressed_fb,
@@ -474,7 +532,7 @@  static int find_compression_limit(struct drm_i915_private *dev_priv,
 	if (ret == 0)
 		return limit;
 
-	for (; limit <= intel_fbc_max_limit(dev_priv, fb_cpp); limit <<= 1) {
+	for (; limit <= intel_fbc_max_limit(dev_priv); limit <<= 1) {
 		ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc->compressed_fb,
 							   size >>= 1, 4096, 0, end);
 		if (ret == 0)
@@ -485,7 +543,7 @@  static int find_compression_limit(struct drm_i915_private *dev_priv,
 }
 
 static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv,
-			       unsigned int size, unsigned int fb_cpp)
+			       unsigned int size, int min_limit)
 {
 	struct intel_fbc *fbc = &dev_priv->fbc;
 	int ret;
@@ -502,13 +560,12 @@  static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv,
 			goto err;
 	}
 
-	ret = find_compression_limit(dev_priv, size, fb_cpp);
+	ret = find_compression_limit(dev_priv, size, min_limit);
 	if (!ret)
 		goto err_llb;
-	else if (ret > 1) {
+	else if (ret > min_limit)
 		drm_info_once(&dev_priv->drm,
 			      "Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n");
-	}
 
 	fbc->limit = ret;
 
@@ -719,11 +776,7 @@  static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
 
 	cache->fb.format = fb->format;
 	cache->fb.modifier = fb->modifier;
-
-	/* FIXME is this correct? */
-	cache->fb.stride = plane_state->view.color_plane[0].stride;
-	if (drm_rotation_90_or_270(plane_state->hw.rotation))
-		cache->fb.stride *= fb->format->cpp[0];
+	cache->fb.stride = intel_fbc_plane_stride(plane_state);
 
 	/* FBC1 compression interval: arbitrary choice of 1 second */
 	cache->interval = drm_mode_vrefresh(&crtc_state->hw.adjusted_mode);
@@ -746,27 +799,29 @@  static bool intel_fbc_cfb_size_changed(struct drm_i915_private *dev_priv)
 {
 	struct intel_fbc *fbc = &dev_priv->fbc;
 
-	return intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache) >
+	return intel_fbc_cfb_size(dev_priv, &fbc->state_cache) >
 		fbc->compressed_fb.size * fbc->limit;
 }
 
-static u16 intel_fbc_override_cfb_stride(struct drm_i915_private *dev_priv)
+static u16 intel_fbc_override_cfb_stride(struct drm_i915_private *dev_priv,
+					 const struct intel_fbc_state_cache *cache)
 {
-	struct intel_fbc *fbc = &dev_priv->fbc;
-	struct intel_fbc_state_cache *cache = &fbc->state_cache;
+	unsigned int stride = _intel_fbc_cfb_stride(cache);
+	unsigned int stride_aligned = intel_fbc_cfb_stride(dev_priv, cache);
 
-	if ((DISPLAY_VER(dev_priv) == 9) &&
-	    cache->fb.modifier != I915_FORMAT_MOD_X_TILED)
-		return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->limit) * 8;
-	else
-		return 0;
-}
+	/*
+	 * Override stride in 64 byte units per 4 line segment.
+	 *
+	 * Gen9 hw miscalculates cfb stride for linear as
+	 * PLANE_STRIDE*512 instead of PLANE_STRIDE*64, so
+	 * we always need to use the override there.
+	 */
+	if (stride != stride_aligned ||
+	    (DISPLAY_VER(dev_priv) == 9 &&
+	     cache->fb.modifier == DRM_FORMAT_MOD_LINEAR))
+		return stride_aligned * 4 / 64;
 
-static bool intel_fbc_override_cfb_stride_changed(struct drm_i915_private *dev_priv)
-{
-	struct intel_fbc *fbc = &dev_priv->fbc;
-
-	return fbc->params.override_cfb_stride != intel_fbc_override_cfb_stride(dev_priv);
+	return 0;
 }
 
 static bool intel_fbc_can_enable(struct drm_i915_private *dev_priv)
@@ -861,7 +916,8 @@  static bool intel_fbc_can_activate(struct intel_crtc *crtc)
 		return false;
 	}
 
-	if (!stride_is_valid(dev_priv, cache->fb.modifier, cache->fb.stride)) {
+	if (!stride_is_valid(dev_priv, cache->fb.modifier,
+			     cache->fb.stride * cache->fb.format->cpp[0])) {
 		fbc->no_fbc_reason = "framebuffer stride not supported";
 		return false;
 	}
@@ -949,9 +1005,9 @@  static void intel_fbc_get_reg_params(struct intel_crtc *crtc,
 	params->fb.modifier = cache->fb.modifier;
 	params->fb.stride = cache->fb.stride;
 
-	params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache);
-
-	params->override_cfb_stride = cache->override_cfb_stride;
+	params->cfb_stride = intel_fbc_cfb_stride(dev_priv, cache);
+	params->cfb_size = intel_fbc_cfb_size(dev_priv, cache);
+	params->override_cfb_stride = intel_fbc_override_cfb_stride(dev_priv, cache);
 
 	params->plane_visible = cache->plane.visible;
 }
@@ -982,10 +1038,13 @@  static bool intel_fbc_can_flip_nuke(const struct intel_crtc_state *crtc_state)
 	if (params->fb.stride != cache->fb.stride)
 		return false;
 
-	if (params->cfb_size != intel_fbc_calculate_cfb_size(dev_priv, cache))
+	if (params->cfb_stride != intel_fbc_cfb_stride(dev_priv, cache))
 		return false;
 
-	if (params->override_cfb_stride != cache->override_cfb_stride)
+	if (params->cfb_size != intel_fbc_cfb_size(dev_priv, cache))
+		return false;
+
+	if (params->override_cfb_stride != intel_fbc_override_cfb_stride(dev_priv, cache))
 		return false;
 
 	return true;
@@ -1250,16 +1309,22 @@  static void intel_fbc_enable(struct intel_atomic_state *state,
 		intel_atomic_get_new_plane_state(state, plane);
 	struct intel_fbc *fbc = &dev_priv->fbc;
 	struct intel_fbc_state_cache *cache = &fbc->state_cache;
+	int min_limit;
 
 	if (!plane->has_fbc || !plane_state)
 		return;
 
+	min_limit = intel_fbc_min_limit(plane_state->hw.fb ?
+					plane_state->hw.fb->format->cpp[0] : 0);
+
 	mutex_lock(&fbc->lock);
 
 	if (fbc->crtc) {
-		if (fbc->crtc != crtc ||
-		    (!intel_fbc_cfb_size_changed(dev_priv) &&
-		     !intel_fbc_override_cfb_stride_changed(dev_priv)))
+		if (fbc->crtc != crtc)
+			goto out;
+
+		if (fbc->limit >= min_limit &&
+		    !intel_fbc_cfb_size_changed(dev_priv))
 			goto out;
 
 		__intel_fbc_disable(dev_priv);
@@ -1274,15 +1339,12 @@  static void intel_fbc_enable(struct intel_atomic_state *state,
 		goto out;
 
 	if (intel_fbc_alloc_cfb(dev_priv,
-				intel_fbc_calculate_cfb_size(dev_priv, cache),
-				plane_state->hw.fb->format->cpp[0])) {
+				intel_fbc_cfb_size(dev_priv, cache), min_limit)) {
 		cache->plane.visible = false;
 		fbc->no_fbc_reason = "not enough stolen memory";
 		goto out;
 	}
 
-	cache->override_cfb_stride = intel_fbc_override_cfb_stride(dev_priv);
-
 	drm_dbg_kms(&dev_priv->drm, "Enabling FBC on pipe %c\n",
 		    pipe_name(crtc->pipe));
 	fbc->no_fbc_reason = "FBC enabled but not active yet\n";
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cc355aa05dbf..804c2a470e94 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -453,7 +453,6 @@  struct intel_fbc {
 		} fb;
 
 		unsigned int fence_y_offset;
-		u16 override_cfb_stride;
 		u16 interval;
 		s8 fence_id;
 		bool psr2_active;
@@ -478,7 +477,8 @@  struct intel_fbc {
 			u64 modifier;
 		} fb;
 
-		int cfb_size;
+		unsigned int cfb_stride;
+		unsigned int cfb_size;
 		unsigned int fence_y_offset;
 		u16 override_cfb_stride;
 		u16 interval;