[v1] drm/i915: Add TigerLake bandwidth checking
diff mbox series

Message ID 20190917130057.7402-1-stanislav.lisovskiy@intel.com
State New
Headers show
Series
  • [v1] drm/i915: Add TigerLake bandwidth checking
Related show

Commit Message

Lisovskiy, Stanislav Sept. 17, 2019, 1 p.m. UTC
Added bandwidth calculation algorithm and checks,
similar way as it was done for ICL, some constants
were corrected according to BSpec.

Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=111600
---
 drivers/gpu/drm/i915/display/intel_bw.c | 108 +++++++++++++++++++++++-
 1 file changed, 107 insertions(+), 1 deletion(-)

Comments

Ville Syrjälä Sept. 17, 2019, 1:44 p.m. UTC | #1
On Tue, Sep 17, 2019 at 04:00:57PM +0300, Stanislav Lisovskiy wrote:
> Added bandwidth calculation algorithm and checks,
> similar way as it was done for ICL, some constants
> were corrected according to BSpec.
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> 
> Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=111600
> ---
>  drivers/gpu/drm/i915/display/intel_bw.c | 108 +++++++++++++++++++++++-
>  1 file changed, 107 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
> index 688858ebe4d0..e97d083f4f2a 100644
> --- a/drivers/gpu/drm/i915/display/intel_bw.c
> +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> @@ -132,7 +132,8 @@ static int icl_sagv_max_dclk(const struct intel_qgv_info *qi)
>  }
>  
>  struct intel_sa_info {
> -	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> +	u8 deburst, mpagesize, deprogbwlimit;
> +	u16 displayrtids;

Put the u16 first to avoid holes in the middle of the struct.

>  };
>  
>  static const struct intel_sa_info icl_sa_info = {
> @@ -142,6 +143,14 @@ static const struct intel_sa_info icl_sa_info = {
>  	.displayrtids = 128,
>  };
>  
> +static const struct intel_sa_info tgl_sa_info = {
> +	.deburst = 16,
> +	.mpagesize = 16,
> +	.deprogbwlimit = 34, /* GB/s */
> +	.displayrtids = 256,
> +};
> +
> +
>  static int icl_get_bw_info(struct drm_i915_private *dev_priv)
>  {
>  	struct intel_qgv_info qi = {};
> @@ -208,6 +217,74 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv)
>  	return 0;
>  }
>  
> +static int tgl_get_bw_info(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_qgv_info qi = {};
> +	const struct intel_sa_info *sa = &tgl_sa_info;
> +	bool is_y_tile = true; /* assume y tile may be used */
> +	int num_channels;
> +	int deinterleave;
> +	int ipqdepth, ipqdepthpch;
> +	int dclk_max;
> +	int maxdebw;
> +	int c3_derating = 10;
> +	int c25_deprogbwpclimit = 60;
> +	int i, ret;
> +
> +	ret = icl_get_qgv_points(dev_priv, &qi);
> +	if (ret) {
> +		DRM_DEBUG_KMS("Failed to get memory subsystem information, ignoring bandwidth limits");
> +		return ret;
> +	}
> +	num_channels = qi.num_channels;
> +
> +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> +	dclk_max = icl_sagv_max_dclk(&qi);
> +
> +	ipqdepthpch = 16;
> +
> +	maxdebw = min(sa->deprogbwlimit * 1000,
> +		      icl_calc_bw(dclk_max, 16 * c25_deprogbwpclimit, 100)); /* 60% */
> +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		int clpchgroup;
> +		int j;
> +
> +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup;
> +
> +		bi->num_qgv_points = qi.num_points;
> +
> +		for (j = 0; j < qi.num_points; j++) {
> +			const struct intel_qgv_point *sp = &qi.points[j];
> +			int ct, bw;
> +
> +			/*
> +			 * Max row cycle time
> +			 *
> +			 * FIXME what is the logic behind the
> +			 * assumed burst length?
> +			 */
> +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> +				   (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
> +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> +
> +			bi->deratedbw[j] = min(maxdebw,
> +					       bw * (100 - c3_derating) / 100); /* 90% */
> +
> +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%u\n",
> +				      i, j, bi->num_planes, bi->deratedbw[j]);
> +		}
> +
> +		if (bi->num_planes == 1)
> +			break;
> +	}

We don't want to duplicate the entire function. Pretty much the
whole point of having the sa_info struct is to make this parametrized.

> +
> +	return 0;
> +}
> +
>  static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
>  			       int num_planes, int qgv_point)
>  {
> @@ -231,10 +308,35 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
>  	return 0;
>  }
>  
> +static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv,
> +			       int num_planes, int qgv_point)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		const struct intel_bw_info *bi =
> +			&dev_priv->max_bw[i];
> +
> +		/*
> +		 * Pcode will not expose all QGV points when
> +		 * SAGV is forced to off/min/med/max.
> +		 */
> +		if (qgv_point >= bi->num_qgv_points)
> +			return UINT_MAX;
> +
> +		if (num_planes >= bi->num_planes)
> +			return bi->deratedbw[qgv_point];
> +	}
> +
> +	return 0;
> +}
> +
>  void intel_bw_init_hw(struct drm_i915_private *dev_priv)
>  {
>  	if (IS_GEN(dev_priv, 11))
>  		icl_get_bw_info(dev_priv);
> +	else if (IS_GEN(dev_priv, 12))
> +		tgl_get_bw_info(dev_priv);
>  }
>  
>  static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> @@ -249,6 +351,10 @@ static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
>  		return min3(icl_max_bw(dev_priv, num_planes, 0),
>  			    icl_max_bw(dev_priv, num_planes, 1),
>  			    icl_max_bw(dev_priv, num_planes, 2));
> +	else if (IS_GEN(dev_priv, 12))
> +		return min3(tgl_max_bw(dev_priv, num_planes, 0),
> +			    tgl_max_bw(dev_priv, num_planes, 1),
> +			    tgl_max_bw(dev_priv, num_planes, 2));
>  	else
>  		return UINT_MAX;
>  }
> -- 
> 2.17.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
James Ausmus Sept. 17, 2019, 7:45 p.m. UTC | #2
On Tue, Sep 17, 2019 at 04:00:57PM +0300, Stanislav Lisovskiy wrote:
> Added bandwidth calculation algorithm and checks,
> similar way as it was done for ICL, some constants
> were corrected according to BSpec.

Heh - I'd been working in this same area, and had some code written up,
but your patch made it to the list first. :)

> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> 
> Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=111600
> ---
>  drivers/gpu/drm/i915/display/intel_bw.c | 108 +++++++++++++++++++++++-
>  1 file changed, 107 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
> index 688858ebe4d0..e97d083f4f2a 100644
> --- a/drivers/gpu/drm/i915/display/intel_bw.c
> +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> @@ -132,7 +132,8 @@ static int icl_sagv_max_dclk(const struct intel_qgv_info *qi)
>  }
>  
>  struct intel_sa_info {
> -	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> +	u8 deburst, mpagesize, deprogbwlimit;
> +	u16 displayrtids;
>  };
>  
>  static const struct intel_sa_info icl_sa_info = {
> @@ -142,6 +143,14 @@ static const struct intel_sa_info icl_sa_info = {
>  	.displayrtids = 128,
>  };
>  
> +static const struct intel_sa_info tgl_sa_info = {
> +	.deburst = 16,
> +	.mpagesize = 16,

This should be 16 only for DDR4, and 32 otherwise - however, it's not
actually used anywhere, so it doesn't matter, but a comment (in case it
needs to be used in the future) would be good.

> +	.deprogbwlimit = 34, /* GB/s */
> +	.displayrtids = 256,
> +};
> +
> +
>  static int icl_get_bw_info(struct drm_i915_private *dev_priv)
>  {
>  	struct intel_qgv_info qi = {};
> @@ -208,6 +217,74 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv)
>  	return 0;
>  }
>  
> +static int tgl_get_bw_info(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_qgv_info qi = {};
> +	const struct intel_sa_info *sa = &tgl_sa_info;
> +	bool is_y_tile = true; /* assume y tile may be used */
> +	int num_channels;
> +	int deinterleave;
> +	int ipqdepth, ipqdepthpch;
> +	int dclk_max;
> +	int maxdebw;
> +	int c3_derating = 10;
> +	int c25_deprogbwpclimit = 60;
> +	int i, ret;
> +
> +	ret = icl_get_qgv_points(dev_priv, &qi);
> +	if (ret) {
> +		DRM_DEBUG_KMS("Failed to get memory subsystem information, ignoring bandwidth limits");
> +		return ret;
> +	}
> +	num_channels = qi.num_channels;
> +
> +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> +	dclk_max = icl_sagv_max_dclk(&qi);
> +
> +	ipqdepthpch = 16;
> +
> +	maxdebw = min(sa->deprogbwlimit * 1000,
> +		      icl_calc_bw(dclk_max, 16 * c25_deprogbwpclimit, 100)); /* 60% */
> +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		int clpchgroup;
> +		int j;
> +
> +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup;
> +
> +		bi->num_qgv_points = qi.num_points;
> +
> +		for (j = 0; j < qi.num_points; j++) {
> +			const struct intel_qgv_point *sp = &qi.points[j];
> +			int ct, bw;
> +
> +			/*
> +			 * Max row cycle time
> +			 *
> +			 * FIXME what is the logic behind the
> +			 * assumed burst length?
> +			 */
> +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> +				   (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);

qi.t_bl also needs to be set dynamically based on memory type - for
DDR4, 4, otherwise 16


-James

> +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> +
> +			bi->deratedbw[j] = min(maxdebw,
> +					       bw * (100 - c3_derating) / 100); /* 90% */
> +
> +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%u\n",
> +				      i, j, bi->num_planes, bi->deratedbw[j]);
> +		}
> +
> +		if (bi->num_planes == 1)
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
>  static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
>  			       int num_planes, int qgv_point)
>  {
> @@ -231,10 +308,35 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
>  	return 0;
>  }
>  
> +static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv,
> +			       int num_planes, int qgv_point)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		const struct intel_bw_info *bi =
> +			&dev_priv->max_bw[i];
> +
> +		/*
> +		 * Pcode will not expose all QGV points when
> +		 * SAGV is forced to off/min/med/max.
> +		 */
> +		if (qgv_point >= bi->num_qgv_points)
> +			return UINT_MAX;
> +
> +		if (num_planes >= bi->num_planes)
> +			return bi->deratedbw[qgv_point];
> +	}
> +
> +	return 0;
> +}
> +
>  void intel_bw_init_hw(struct drm_i915_private *dev_priv)
>  {
>  	if (IS_GEN(dev_priv, 11))
>  		icl_get_bw_info(dev_priv);
> +	else if (IS_GEN(dev_priv, 12))
> +		tgl_get_bw_info(dev_priv);
>  }
>  
>  static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> @@ -249,6 +351,10 @@ static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
>  		return min3(icl_max_bw(dev_priv, num_planes, 0),
>  			    icl_max_bw(dev_priv, num_planes, 1),
>  			    icl_max_bw(dev_priv, num_planes, 2));
> +	else if (IS_GEN(dev_priv, 12))
> +		return min3(tgl_max_bw(dev_priv, num_planes, 0),
> +			    tgl_max_bw(dev_priv, num_planes, 1),
> +			    tgl_max_bw(dev_priv, num_planes, 2));
>  	else
>  		return UINT_MAX;
>  }
> -- 
> 2.17.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
index 688858ebe4d0..e97d083f4f2a 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -132,7 +132,8 @@  static int icl_sagv_max_dclk(const struct intel_qgv_info *qi)
 }
 
 struct intel_sa_info {
-	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
+	u8 deburst, mpagesize, deprogbwlimit;
+	u16 displayrtids;
 };
 
 static const struct intel_sa_info icl_sa_info = {
@@ -142,6 +143,14 @@  static const struct intel_sa_info icl_sa_info = {
 	.displayrtids = 128,
 };
 
+static const struct intel_sa_info tgl_sa_info = {
+	.deburst = 16,
+	.mpagesize = 16,
+	.deprogbwlimit = 34, /* GB/s */
+	.displayrtids = 256,
+};
+
+
 static int icl_get_bw_info(struct drm_i915_private *dev_priv)
 {
 	struct intel_qgv_info qi = {};
@@ -208,6 +217,74 @@  static int icl_get_bw_info(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
+static int tgl_get_bw_info(struct drm_i915_private *dev_priv)
+{
+	struct intel_qgv_info qi = {};
+	const struct intel_sa_info *sa = &tgl_sa_info;
+	bool is_y_tile = true; /* assume y tile may be used */
+	int num_channels;
+	int deinterleave;
+	int ipqdepth, ipqdepthpch;
+	int dclk_max;
+	int maxdebw;
+	int c3_derating = 10;
+	int c25_deprogbwpclimit = 60;
+	int i, ret;
+
+	ret = icl_get_qgv_points(dev_priv, &qi);
+	if (ret) {
+		DRM_DEBUG_KMS("Failed to get memory subsystem information, ignoring bandwidth limits");
+		return ret;
+	}
+	num_channels = qi.num_channels;
+
+	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
+	dclk_max = icl_sagv_max_dclk(&qi);
+
+	ipqdepthpch = 16;
+
+	maxdebw = min(sa->deprogbwlimit * 1000,
+		      icl_calc_bw(dclk_max, 16 * c25_deprogbwpclimit, 100)); /* 60% */
+	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
+
+	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
+		struct intel_bw_info *bi = &dev_priv->max_bw[i];
+		int clpchgroup;
+		int j;
+
+		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
+		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup;
+
+		bi->num_qgv_points = qi.num_points;
+
+		for (j = 0; j < qi.num_points; j++) {
+			const struct intel_qgv_point *sp = &qi.points[j];
+			int ct, bw;
+
+			/*
+			 * Max row cycle time
+			 *
+			 * FIXME what is the logic behind the
+			 * assumed burst length?
+			 */
+			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
+				   (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
+			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
+
+			bi->deratedbw[j] = min(maxdebw,
+					       bw * (100 - c3_derating) / 100); /* 90% */
+
+			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%u\n",
+				      i, j, bi->num_planes, bi->deratedbw[j]);
+		}
+
+		if (bi->num_planes == 1)
+			break;
+	}
+
+	return 0;
+}
+
 static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
 			       int num_planes, int qgv_point)
 {
@@ -231,10 +308,35 @@  static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
 	return 0;
 }
 
+static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv,
+			       int num_planes, int qgv_point)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
+		const struct intel_bw_info *bi =
+			&dev_priv->max_bw[i];
+
+		/*
+		 * Pcode will not expose all QGV points when
+		 * SAGV is forced to off/min/med/max.
+		 */
+		if (qgv_point >= bi->num_qgv_points)
+			return UINT_MAX;
+
+		if (num_planes >= bi->num_planes)
+			return bi->deratedbw[qgv_point];
+	}
+
+	return 0;
+}
+
 void intel_bw_init_hw(struct drm_i915_private *dev_priv)
 {
 	if (IS_GEN(dev_priv, 11))
 		icl_get_bw_info(dev_priv);
+	else if (IS_GEN(dev_priv, 12))
+		tgl_get_bw_info(dev_priv);
 }
 
 static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
@@ -249,6 +351,10 @@  static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
 		return min3(icl_max_bw(dev_priv, num_planes, 0),
 			    icl_max_bw(dev_priv, num_planes, 1),
 			    icl_max_bw(dev_priv, num_planes, 2));
+	else if (IS_GEN(dev_priv, 12))
+		return min3(tgl_max_bw(dev_priv, num_planes, 0),
+			    tgl_max_bw(dev_priv, num_planes, 1),
+			    tgl_max_bw(dev_priv, num_planes, 2));
 	else
 		return UINT_MAX;
 }