diff mbox series

[v3,2/2] drm/i915: Make sure we have enough memory bandwidth on ICL

Message ID 20190503190831.1759-2-ville.syrjala@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [v3,1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register | expand

Commit Message

Ville Syrjälä May 3, 2019, 7:08 p.m. UTC
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

ICL has so many planes that it can easily exceed the maximum
effective memory bandwidth of the system. We must therefore check
that we don't exceed that limit.

The algorithm is very magic number heavy and lacks sufficient
explanation for now. We also have no sane way to query the
memory clock and timings, so we must rely on a combination of
raw readout from the memory controller and hardcoded assumptions.
The memory controller values obviously change as the system
jumps between the different SAGV points, so we try to stabilize
it first by disabling SAGV for the duration of the readout.

The utilized bandwidth is tracked via a device wide atomic
private object. That is actually not robust because we can't
afford to enforce strict global ordering between the pipes.
Thus I think I'll need to change this to simply chop up the
available bandwidth between all the active pipes. Each pipe
can then do whatever it wants as long as it doesn't exceed
its budget. That scheme will also require that we assume that
any number of planes could be active at any time.

TODO: make it robust and deal with all the open questions

v2: Sleep longer after disabling SAGV
v3: Poll for the dclk to get raised (seen it take 250ms!)
    If the system has 2133MT/s memory then we pointlessly
    wait one full second :(
v4: Use the new pcode interface to get the qgv points rather
    that using hardcoded numbers

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/Makefile             |   1 +
 drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h           |  10 +
 drivers/gpu/drm/i915/i915_reg.h           |   3 +
 drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
 drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
 drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
 drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
 drivers/gpu/drm/i915/intel_display.c      |  40 +++-
 drivers/gpu/drm/i915/intel_drv.h          |   2 +
 10 files changed, 533 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/intel_bw.c
 create mode 100644 drivers/gpu/drm/i915/intel_bw.h

Comments

Clint Taylor May 6, 2019, 10:38 p.m. UTC | #1
On 5/3/19 12:08 PM, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> ICL has so many planes that it can easily exceed the maximum
> effective memory bandwidth of the system. We must therefore check
> that we don't exceed that limit.
>
> The algorithm is very magic number heavy and lacks sufficient
> explanation for now. We also have no sane way to query the
> memory clock and timings, so we must rely on a combination of
> raw readout from the memory controller and hardcoded assumptions.
> The memory controller values obviously change as the system
> jumps between the different SAGV points, so we try to stabilize
> it first by disabling SAGV for the duration of the readout.
>
> The utilized bandwidth is tracked via a device wide atomic
> private object. That is actually not robust because we can't
> afford to enforce strict global ordering between the pipes.
> Thus I think I'll need to change this to simply chop up the
> available bandwidth between all the active pipes. Each pipe
> can then do whatever it wants as long as it doesn't exceed
> its budget. That scheme will also require that we assume that
> any number of planes could be active at any time.
>
> TODO: make it robust and deal with all the open questions

TODO: Add comments detailing structures

>
> v2: Sleep longer after disabling SAGV
> v3: Poll for the dclk to get raised (seen it take 250ms!)
>      If the system has 2133MT/s memory then we pointlessly
>      wait one full second :(
> v4: Use the new pcode interface to get the qgv points rather
>      that using hardcoded numbers
>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile             |   1 +
>   drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_drv.h           |  10 +
>   drivers/gpu/drm/i915/i915_reg.h           |   3 +
>   drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
>   drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
>   drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
>   drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
>   drivers/gpu/drm/i915/intel_display.c      |  40 +++-
>   drivers/gpu/drm/i915/intel_drv.h          |   2 +
>   10 files changed, 533 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/gpu/drm/i915/intel_bw.c
>   create mode 100644 drivers/gpu/drm/i915/intel_bw.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 68106fe35a04..139a0fc19390 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
>   	  intel_atomic.o \
>   	  intel_atomic_plane.o \
>   	  intel_bios.o \
> +	  intel_bw.o \
>   	  intel_cdclk.o \
>   	  intel_color.o \
>   	  intel_combo_phy.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 5ed864752c7b..b7fa7b51c2e2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -70,6 +70,7 @@
>   #include "intel_overlay.h"
>   #include "intel_pipe_crc.h"
>   #include "intel_pm.h"
> +#include "intel_sideband.h"
>   #include "intel_sprite.h"
>   #include "intel_uc.h"
>   
> @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
>   	return 0;
>   }
>   
> +struct intel_qgv_point {
> +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> +};
> +
> +struct intel_sagv_info {
> +	struct intel_qgv_point points[3];
> +	u8 num_points;
> +	u8 num_channels;
> +	u8 t_bl;
> +	enum intel_dram_type dram_type;
> +};
> +
> +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> +					  struct intel_sagv_info *si)
> +{
> +	u32 val = 0;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> +				     &val, NULL);
> +	if (ret)
> +		return ret;
> +
> +	switch (val & 0xf) {
> +	case 0:
> +		si->dram_type = INTEL_DRAM_DDR4;
> +		break;
> +	case 1:
> +		si->dram_type = INTEL_DRAM_DDR3;
> +		break;
> +	case 2:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	case 3:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	default:
> +		MISSING_CASE(val & 0xf);
> +		break;
> +	}
> +
> +	si->num_channels = (val & 0xf0) >> 4;
> +	si->num_points = (val & 0xf00) >> 8;
> +
> +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> +
> +	return 0;
> +}
> +
> +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> +					 struct intel_qgv_point *sp,
> +					 int point)
> +{
> +	u32 val = 0, val2;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> +				     &val, &val2);
> +	if (ret)
> +		return ret;
> +
> +	sp->dclk = val & 0xffff;
> +	sp->t_rp = (val & 0xff0000) >> 16;
> +	sp->t_rcd = (val & 0xff000000) >> 24;
> +
> +	sp->t_rdpre = val2 & 0xff;
> +	sp->t_ras = (val2 & 0xff00) >> 8;
> +
> +	sp->t_rc = sp->t_rp + sp->t_ras;
> +
> +	return 0;
> +}
> +
> +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> +			      struct intel_sagv_info *si)
> +{
> +	int i, ret;
> +
> +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> +	if (ret)
> +		return ret;
> +
> +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> +		si->num_points = ARRAY_SIZE(si->points);
> +
> +	for (i = 0; i < si->num_points; i++) {
> +		struct intel_qgv_point *sp = &si->points[i];
> +
> +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> +		if (ret)
> +			return ret;
> +
> +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> +			      sp->t_rcd, sp->t_rc);
> +	}
> +
> +	return 0;
> +}
> +
> +static int icl_calc_bw(int dclk, int num, int den)
> +{
> +	/* multiples of 16.666MHz (100/6) */
> +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> +}
> +
> +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> +{
> +	u16 dclk = 0;
> +	int i;
> +
> +	for (i = 0; i < si->num_points; i++)
> +		dclk = max(dclk, si->points[i].dclk);
> +
> +	return dclk;
> +}
> +
> +struct intel_sa_info {
> +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> +};

intel_sa_info? Doesn't seem very descriptive. also very close to 
intel_sagv_info

> +
> +static const struct intel_sa_info icl_sa_info = {
> +	.deburst = 8,
> +	.mpagesize = 16,
> +	.deprogbwlimit = 25, /* GB/s */
> +	.displayrtids = 128,
> +};
> +
> +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_sagv_info si = {};
> +	const struct intel_sa_info *sa = &icl_sa_info;
> +	bool is_y_tile = true; /* assume y tile may be used */
> +	int num_channels;
> +	int deinterleave;
> +	int ipqdepth, ipqdepthpch;
> +	int dclk_max;
> +	int maxdebw;
> +	int i, ret;
> +
> +	ret = icl_get_qgv_points(dev_priv, &si);
> +	if (ret)
> +		return ret;
> +	num_channels = si.num_channels;
> +
> +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> +	dclk_max = icl_sagv_max_dclk(&si);
> +
> +	ipqdepthpch = 16;
> +
> +	maxdebw = min(sa->deprogbwlimit * 1000,
> +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		int clpchgroup;
> +		int j;
> +
> +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> +
> +		for (j = 0; j < si.num_points; j++) {
> +			const struct intel_qgv_point *sp = &si.points[j];
> +			int ct, bw;
> +
> +			/*
> +			 * Max row cycle time
> +			 *
> +			 * FIXME what is the logic behind the
> +			 * assumed burst length?
> +			 */
> +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> +
> +			bi->deratedbw[j] = min(maxdebw,
> +					       bw * 9 / 10); /* 90% */
> +
> +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> +				      i, j, bi->num_planes, bi->deratedbw[j]);
> +		}
> +
> +		if (bi->num_planes == 1)
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
> +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> +			       int num_planes, int qgv_point)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		const struct intel_bw_info *bi =
> +			&dev_priv->max_bw[i];
> +
> +		if (num_planes >= bi->num_planes)
> +			return bi->deratedbw[qgv_point];
> +	}
> +
> +	return 0;
> +}
> +
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes)
> +{
> +	if (IS_ICELAKE(dev_priv))
> +		/*
> +		 * FIXME with SAGV disabled maybe we can assume
> +		 * point 1 will always be used? Seems to match
> +		 * the behaviour observed in the wild.
> +		 */
> +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> +			    icl_max_bw(dev_priv, num_planes, 1),
> +			    icl_max_bw(dev_priv, num_planes, 2));
> +	else
> +		return UINT_MAX;
> +}
> +
>   static void
>   intel_get_dram_info(struct drm_i915_private *dev_priv)
>   {
> @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
>   	 */
>   	intel_get_dram_info(dev_priv);
>   
> +	if (INTEL_GEN(dev_priv) >= 11)
> +		icl_get_bw_info(dev_priv);
>   
>   	return 0;
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 64fa353a62bb..d1b9c3fe5802 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -54,6 +54,7 @@
>   #include <drm/drm_cache.h>
>   #include <drm/drm_util.h>
>   #include <drm/drm_dsc.h>
> +#include <drm/drm_atomic.h>
>   #include <drm/drm_connector.h>
>   #include <drm/i915_mei_hdcp_interface.h>
>   
> @@ -1837,6 +1838,13 @@ struct drm_i915_private {
>   		} type;
>   	} dram_info;
>   
> +	struct intel_bw_info {
> +		int num_planes;
> +		int deratedbw[3];
> +	} max_bw[6];
> +
> +	struct drm_private_obj bw_obj;
> +
>   	struct i915_runtime_pm runtime_pm;
>   
>   	struct {
> @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
>   extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
>   extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
>   int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes);
>   
>   u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
>   
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e97c47fca645..399366a41524 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8774,6 +8774,9 @@ enum {
>   #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
>   #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
>   #define   GEN6_READ_OC_PARAMS			0xc
> +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
>   #define   GEN6_PCODE_READ_D_COMP		0x10
>   #define   GEN6_PCODE_WRITE_D_COMP		0x11
>   #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> index d11681d71add..f142c5c22d7e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
>   	drm_atomic_helper_plane_destroy_state(plane, state);
>   }
>   
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state)
> +{
> +	const struct drm_framebuffer *fb = plane_state->base.fb;
> +	unsigned int cpp = 0;
> +	int i;
> +
> +	if (!plane_state->base.visible)
> +		return 0;
> +
> +	for (i = 0; i < fb->format->num_planes; i++)
> +		cpp += fb->format->cpp[i];
> +
> +	return cpp * crtc_state->pixel_rate;
> +}
> +
>   int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
>   					struct intel_crtc_state *new_crtc_state,
>   					const struct intel_plane_state *old_plane_state,
> @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>   	new_crtc_state->active_planes &= ~BIT(plane->id);
>   	new_crtc_state->nv12_planes &= ~BIT(plane->id);
>   	new_crtc_state->c8_planes &= ~BIT(plane->id);
> +	new_crtc_state->data_rate[plane->id] = 0;
>   	new_plane_state->base.visible = false;
>   
>   	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>   	if (new_plane_state->base.visible || old_plane_state->base.visible)
>   		new_crtc_state->update_planes |= BIT(plane->id);
>   
> +	new_crtc_state->data_rate[plane->id] =
> +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> +
>   	return intel_plane_atomic_calc_changes(old_crtc_state,
>   					       &new_crtc_state->base,
>   					       old_plane_state,
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> index 14678620440f..0a9651376d0e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> @@ -15,6 +15,8 @@ struct intel_plane_state;
>   
>   extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
>   
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state);
>   void intel_update_plane(struct intel_plane *plane,
>   			const struct intel_crtc_state *crtc_state,
>   			const struct intel_plane_state *plane_state);
> diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> new file mode 100644
> index 000000000000..304bf87f0a2e
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include <drm/drm_atomic_state_helper.h>
> +
> +#include "intel_bw.h"
> +#include "intel_drv.h"
> +
> +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> +{
> +	/*
> +	 * We assume cursors are small enough
> +	 * to not not cause bandwidth problems.
> +	 */
> +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
> +}
> +
> +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	unsigned int data_rate = 0;
> +	enum plane_id plane_id;
> +
> +	for_each_plane_id_on_crtc(crtc, plane_id) {
> +		/*
> +		 * We assume cursors are small enough
> +		 * to not not cause bandwidth problems.
> +		 */
> +		if (plane_id == PLANE_CURSOR)
> +			continue;
> +
> +		data_rate += crtc_state->data_rate[plane_id];
> +	}
> +
> +	return data_rate;
> +}
> +
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +
> +	bw_state->data_rate[crtc->pipe] =
> +		intel_bw_crtc_data_rate(crtc_state);
> +	bw_state->num_active_planes[crtc->pipe] =
> +		intel_bw_crtc_num_active_planes(crtc_state);
> +
> +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +		      pipe_name(crtc->pipe),
> +		      bw_state->data_rate[crtc->pipe],
> +		      bw_state->num_active_planes[crtc->pipe]);
> +}
> +
> +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> +					       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int num_active_planes = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		num_active_planes += bw_state->num_active_planes[pipe];
> +
> +	return num_active_planes;
> +}
> +
> +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> +				       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int data_rate = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		data_rate += bw_state->data_rate[pipe];
> +
> +	return data_rate;
> +}
> +
> +int intel_bw_atomic_check(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> +	struct intel_bw_state *bw_state = NULL;
> +	unsigned int data_rate, max_data_rate;
> +	unsigned int num_active_planes;
> +	struct intel_crtc *crtc;
> +	int i;
> +
> +	/* FIXME earlier gens need some checks too */
> +	if (INTEL_GEN(dev_priv) < 11)
> +		return 0;
> +
> +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> +					    new_crtc_state, i) {
> +		unsigned int old_data_rate =
> +			intel_bw_crtc_data_rate(old_crtc_state);
> +		unsigned int new_data_rate =
> +			intel_bw_crtc_data_rate(new_crtc_state);
> +		unsigned int old_active_planes =
> +			intel_bw_crtc_num_active_planes(old_crtc_state);
> +		unsigned int new_active_planes =
> +			intel_bw_crtc_num_active_planes(new_crtc_state);
> +
> +		/*
> +		 * Avoid locking the bw state when
> +		 * nothing significant has changed.
> +		 */
> +		if (old_data_rate == new_data_rate &&
> +		    old_active_planes == new_active_planes)
> +			continue;
> +
> +		bw_state  = intel_atomic_get_bw_state(state);
> +		if (IS_ERR(bw_state))
> +			return PTR_ERR(bw_state);
> +
> +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> +
> +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +			      pipe_name(crtc->pipe),
> +			      bw_state->data_rate[crtc->pipe],
> +			      bw_state->num_active_planes[crtc->pipe]);
> +	}
> +
> +	if (!bw_state)
> +		return 0;
> +
> +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> +
> +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> +
> +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> +
> +	if (data_rate > max_data_rate) {
> +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> +			      data_rate, max_data_rate, num_active_planes);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return NULL;
> +
> +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> +
> +	return &state->base;
> +}
> +
> +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> +				   struct drm_private_state *state)
> +{
> +	kfree(state);
> +}
> +
> +static const struct drm_private_state_funcs intel_bw_funcs = {
> +	.atomic_duplicate_state = intel_bw_duplicate_state,
> +	.atomic_destroy_state = intel_bw_destroy_state,
> +};
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return -ENOMEM;
> +
> +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> +				    &state->base, &intel_bw_funcs);
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> new file mode 100644
> index 000000000000..c14272ca5b59
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef __INTEL_BW_H__
> +#define __INTEL_BW_H__
> +
> +#include <drm/drm_atomic.h>
> +
> +#include "i915_drv.h"
> +#include "intel_display.h"
> +
> +struct drm_i915_private;
> +struct intel_atomic_state;
> +struct intel_crtc_state;
> +
> +struct intel_bw_state {
> +	struct drm_private_state base;
> +
> +	unsigned int data_rate[I915_MAX_PIPES];
> +	u8 num_active_planes[I915_MAX_PIPES];
> +};
> +
> +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> +
> +static inline struct intel_bw_state *
> +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct drm_private_state *bw_state;
> +
> +	bw_state = drm_atomic_get_private_obj_state(&state->base,
> +						    &dev_priv->bw_obj);
> +	if (IS_ERR(bw_state))
> +		return ERR_CAST(bw_state);
> +
> +	return to_intel_bw_state(bw_state);
> +}
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv);
> +int intel_bw_atomic_check(struct intel_atomic_state *state);
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state);
> +
> +#endif /* __INTEL_BW_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index d81ec80e34f6..a955840b73cb 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -50,6 +50,7 @@
>   #include "intel_acpi.h"
>   #include "intel_atomic.h"
>   #include "intel_atomic_plane.h"
> +#include "intel_bw.h"
>   #include "intel_color.h"
>   #include "intel_cdclk.h"
>   #include "intel_crt.h"
> @@ -2863,6 +2864,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
>   
>   	intel_set_plane_visible(crtc_state, plane_state, false);
>   	fixup_active_planes(crtc_state);
> +	crtc_state->data_rate[plane->id] = 0;
>   
>   	if (plane->id == PLANE_PRIMARY)
>   		intel_pre_disable_primary_noatomic(&crtc->base);
> @@ -6590,6 +6592,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>   	struct intel_encoder *encoder;
>   	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>   	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> +	struct intel_bw_state *bw_state =
> +		to_intel_bw_state(dev_priv->bw_obj.state);
>   	enum intel_display_power_domain domain;
>   	struct intel_plane *plane;
>   	u64 domains;
> @@ -6652,6 +6656,9 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>   	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
>   	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
>   	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
> +
> +	bw_state->data_rate[intel_crtc->pipe] = 0;
> +	bw_state->num_active_planes[intel_crtc->pipe] = 0;
>   }
>   
>   /*
> @@ -11176,6 +11183,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
>   	if (!is_crtc_enabled) {
>   		plane_state->visible = visible = false;
>   		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
> +		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
>   	}
>   
>   	if (!was_visible && !visible)
> @@ -13296,7 +13304,15 @@ static int intel_atomic_check(struct drm_device *dev,
>   		return ret;
>   
>   	intel_fbc_choose_crtc(dev_priv, intel_state);
> -	return calc_watermark_data(intel_state);
> +	ret = calc_watermark_data(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_bw_atomic_check(intel_state);
This should be before calc_watermark_data. If we don't have the 
bandwidth why calculate the WMs.
> +	if (ret)
> +		return ret;
> +
> +	return 0;
>   }
>   
>   static int intel_atomic_prepare_commit(struct drm_device *dev,
> @@ -15696,6 +15712,10 @@ int intel_modeset_init(struct drm_device *dev)
>   
>   	drm_mode_config_init(dev);
>   
> +	ret = intel_bw_init(dev_priv);
> +	if (ret)
> +		return ret;
> +
>   	dev->mode_config.min_width = 0;
>   	dev->mode_config.min_height = 0;
>   
> @@ -16318,8 +16338,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>   	drm_connector_list_iter_end(&conn_iter);
>   
>   	for_each_intel_crtc(dev, crtc) {
> +		struct intel_bw_state *bw_state =
> +			to_intel_bw_state(dev_priv->bw_obj.state);
>   		struct intel_crtc_state *crtc_state =
>   			to_intel_crtc_state(crtc->base.state);
> +		struct intel_plane *plane;
>   		int min_cdclk = 0;
>   
>   		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
> @@ -16358,6 +16381,21 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>   		dev_priv->min_voltage_level[crtc->pipe] =
>   			crtc_state->min_voltage_level;
>   
> +		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
> +			const struct intel_plane_state *plane_state =
> +				to_intel_plane_state(plane->base.state);
> +
> +			/*
> +			 * FIXME don't have the fb yet, so can't
> +			 * use intel_plane_data_rate() :(
> +			 */
> +			if (plane_state->base.visible)
> +				crtc_state->data_rate[plane->id] =
> +					4 * crtc_state->pixel_rate;
> +		}
> +
> +		intel_bw_crtc_update(bw_state, crtc_state);
> +
>   		intel_pipe_config_sanity_check(dev_priv, crtc_state);
>   	}
>   }
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 4049e03d2c0d..47f551601a05 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -885,6 +885,8 @@ struct intel_crtc_state {
>   
>   	struct intel_crtc_wm_state wm;
>   
> +	u32 data_rate[I915_MAX_PLANES];
> +
>   	/* Gamma mode programmed on the pipe */
>   	u32 gamma_mode;
>
Ville Syrjälä May 7, 2019, 10:20 a.m. UTC | #2
On Mon, May 06, 2019 at 03:38:43PM -0700, Clinton Taylor wrote:
> 
> On 5/3/19 12:08 PM, Ville Syrjala wrote:
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> >
> > ICL has so many planes that it can easily exceed the maximum
> > effective memory bandwidth of the system. We must therefore check
> > that we don't exceed that limit.
> >
> > The algorithm is very magic number heavy and lacks sufficient
> > explanation for now. We also have no sane way to query the
> > memory clock and timings, so we must rely on a combination of
> > raw readout from the memory controller and hardcoded assumptions.
> > The memory controller values obviously change as the system
> > jumps between the different SAGV points, so we try to stabilize
> > it first by disabling SAGV for the duration of the readout.
> >
> > The utilized bandwidth is tracked via a device wide atomic
> > private object. That is actually not robust because we can't
> > afford to enforce strict global ordering between the pipes.
> > Thus I think I'll need to change this to simply chop up the
> > available bandwidth between all the active pipes. Each pipe
> > can then do whatever it wants as long as it doesn't exceed
> > its budget. That scheme will also require that we assume that
> > any number of planes could be active at any time.
> >
> > TODO: make it robust and deal with all the open questions
> 
> TODO: Add comments detailing structures
> 
> >
> > v2: Sleep longer after disabling SAGV
> > v3: Poll for the dclk to get raised (seen it take 250ms!)
> >      If the system has 2133MT/s memory then we pointlessly
> >      wait one full second :(
> > v4: Use the new pcode interface to get the qgv points rather
> >      that using hardcoded numbers
> >
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/Makefile             |   1 +
> >   drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
> >   drivers/gpu/drm/i915/i915_drv.h           |  10 +
> >   drivers/gpu/drm/i915/i915_reg.h           |   3 +
> >   drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
> >   drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
> >   drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
> >   drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
> >   drivers/gpu/drm/i915/intel_display.c      |  40 +++-
> >   drivers/gpu/drm/i915/intel_drv.h          |   2 +
> >   10 files changed, 533 insertions(+), 1 deletion(-)
> >   create mode 100644 drivers/gpu/drm/i915/intel_bw.c
> >   create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> >
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index 68106fe35a04..139a0fc19390 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
> >   	  intel_atomic.o \
> >   	  intel_atomic_plane.o \
> >   	  intel_bios.o \
> > +	  intel_bw.o \
> >   	  intel_cdclk.o \
> >   	  intel_color.o \
> >   	  intel_combo_phy.o \
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 5ed864752c7b..b7fa7b51c2e2 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -70,6 +70,7 @@
> >   #include "intel_overlay.h"
> >   #include "intel_pipe_crc.h"
> >   #include "intel_pm.h"
> > +#include "intel_sideband.h"
> >   #include "intel_sprite.h"
> >   #include "intel_uc.h"
> >   
> > @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
> >   	return 0;
> >   }
> >   
> > +struct intel_qgv_point {
> > +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> > +};
> > +
> > +struct intel_sagv_info {
> > +	struct intel_qgv_point points[3];
> > +	u8 num_points;
> > +	u8 num_channels;
> > +	u8 t_bl;
> > +	enum intel_dram_type dram_type;
> > +};
> > +
> > +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> > +					  struct intel_sagv_info *si)
> > +{
> > +	u32 val = 0;
> > +	int ret;
> > +
> > +	ret = sandybridge_pcode_read(dev_priv,
> > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> > +				     &val, NULL);
> > +	if (ret)
> > +		return ret;
> > +
> > +	switch (val & 0xf) {
> > +	case 0:
> > +		si->dram_type = INTEL_DRAM_DDR4;
> > +		break;
> > +	case 1:
> > +		si->dram_type = INTEL_DRAM_DDR3;
> > +		break;
> > +	case 2:
> > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > +		break;
> > +	case 3:
> > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > +		break;
> > +	default:
> > +		MISSING_CASE(val & 0xf);
> > +		break;
> > +	}
> > +
> > +	si->num_channels = (val & 0xf0) >> 4;
> > +	si->num_points = (val & 0xf00) >> 8;
> > +
> > +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> > +					 struct intel_qgv_point *sp,
> > +					 int point)
> > +{
> > +	u32 val = 0, val2;
> > +	int ret;
> > +
> > +	ret = sandybridge_pcode_read(dev_priv,
> > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> > +				     &val, &val2);
> > +	if (ret)
> > +		return ret;
> > +
> > +	sp->dclk = val & 0xffff;
> > +	sp->t_rp = (val & 0xff0000) >> 16;
> > +	sp->t_rcd = (val & 0xff000000) >> 24;
> > +
> > +	sp->t_rdpre = val2 & 0xff;
> > +	sp->t_ras = (val2 & 0xff00) >> 8;
> > +
> > +	sp->t_rc = sp->t_rp + sp->t_ras;
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> > +			      struct intel_sagv_info *si)
> > +{
> > +	int i, ret;
> > +
> > +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> > +		si->num_points = ARRAY_SIZE(si->points);
> > +
> > +	for (i = 0; i < si->num_points; i++) {
> > +		struct intel_qgv_point *sp = &si->points[i];
> > +
> > +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> > +		if (ret)
> > +			return ret;
> > +
> > +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> > +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> > +			      sp->t_rcd, sp->t_rc);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_calc_bw(int dclk, int num, int den)
> > +{
> > +	/* multiples of 16.666MHz (100/6) */
> > +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> > +}
> > +
> > +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> > +{
> > +	u16 dclk = 0;
> > +	int i;
> > +
> > +	for (i = 0; i < si->num_points; i++)
> > +		dclk = max(dclk, si->points[i].dclk);
> > +
> > +	return dclk;
> > +}
> > +
> > +struct intel_sa_info {
> > +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> > +};
> 
> intel_sa_info?

sa == system agent.

> Doesn't seem very descriptive. also very close to 
> intel_sagv_info

I was meaning to rename intel_sagv_info to intel_qgv_info but
apparently forgot. Oh, I should really add a comment explaining
what qgv stands for.

> 
> > +
> > +static const struct intel_sa_info icl_sa_info = {
> > +	.deburst = 8,
> > +	.mpagesize = 16,
> > +	.deprogbwlimit = 25, /* GB/s */
> > +	.displayrtids = 128,
> > +};
> > +
> > +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> > +{
> > +	struct intel_sagv_info si = {};
> > +	const struct intel_sa_info *sa = &icl_sa_info;
> > +	bool is_y_tile = true; /* assume y tile may be used */
> > +	int num_channels;
> > +	int deinterleave;
> > +	int ipqdepth, ipqdepthpch;
> > +	int dclk_max;
> > +	int maxdebw;
> > +	int i, ret;
> > +
> > +	ret = icl_get_qgv_points(dev_priv, &si);
> > +	if (ret)
> > +		return ret;
> > +	num_channels = si.num_channels;
> > +
> > +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> > +	dclk_max = icl_sagv_max_dclk(&si);
> > +
> > +	ipqdepthpch = 16;
> > +
> > +	maxdebw = min(sa->deprogbwlimit * 1000,
> > +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> > +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> > +
> > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> > +		int clpchgroup;
> > +		int j;
> > +
> > +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> > +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> > +
> > +		for (j = 0; j < si.num_points; j++) {
> > +			const struct intel_qgv_point *sp = &si.points[j];
> > +			int ct, bw;
> > +
> > +			/*
> > +			 * Max row cycle time
> > +			 *
> > +			 * FIXME what is the logic behind the
> > +			 * assumed burst length?
> > +			 */
> > +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> > +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> > +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> > +
> > +			bi->deratedbw[j] = min(maxdebw,
> > +					       bw * 9 / 10); /* 90% */
> > +
> > +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> > +				      i, j, bi->num_planes, bi->deratedbw[j]);
> > +		}
> > +
> > +		if (bi->num_planes == 1)
> > +			break;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> > +			       int num_planes, int qgv_point)
> > +{
> > +	int i;
> > +
> > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > +		const struct intel_bw_info *bi =
> > +			&dev_priv->max_bw[i];
> > +
> > +		if (num_planes >= bi->num_planes)
> > +			return bi->deratedbw[qgv_point];
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> > +				 int num_planes)
> > +{
> > +	if (IS_ICELAKE(dev_priv))
> > +		/*
> > +		 * FIXME with SAGV disabled maybe we can assume
> > +		 * point 1 will always be used? Seems to match
> > +		 * the behaviour observed in the wild.
> > +		 */
> > +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> > +			    icl_max_bw(dev_priv, num_planes, 1),
> > +			    icl_max_bw(dev_priv, num_planes, 2));
> > +	else
> > +		return UINT_MAX;
> > +}
> > +
> >   static void
> >   intel_get_dram_info(struct drm_i915_private *dev_priv)
> >   {
> > @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
> >   	 */
> >   	intel_get_dram_info(dev_priv);
> >   
> > +	if (INTEL_GEN(dev_priv) >= 11)
> > +		icl_get_bw_info(dev_priv);
> >   
> >   	return 0;
> >   
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 64fa353a62bb..d1b9c3fe5802 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -54,6 +54,7 @@
> >   #include <drm/drm_cache.h>
> >   #include <drm/drm_util.h>
> >   #include <drm/drm_dsc.h>
> > +#include <drm/drm_atomic.h>
> >   #include <drm/drm_connector.h>
> >   #include <drm/i915_mei_hdcp_interface.h>
> >   
> > @@ -1837,6 +1838,13 @@ struct drm_i915_private {
> >   		} type;
> >   	} dram_info;
> >   
> > +	struct intel_bw_info {
> > +		int num_planes;
> > +		int deratedbw[3];
> > +	} max_bw[6];
> > +
> > +	struct drm_private_obj bw_obj;
> > +
> >   	struct i915_runtime_pm runtime_pm;
> >   
> >   	struct {
> > @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
> >   extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
> >   extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
> >   int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> > +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> > +				 int num_planes);
> >   
> >   u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
> >   
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index e97c47fca645..399366a41524 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -8774,6 +8774,9 @@ enum {
> >   #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
> >   #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
> >   #define   GEN6_READ_OC_PARAMS			0xc
> > +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> > +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> > +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
> >   #define   GEN6_PCODE_READ_D_COMP		0x10
> >   #define   GEN6_PCODE_WRITE_D_COMP		0x11
> >   #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> > diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> > index d11681d71add..f142c5c22d7e 100644
> > --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> > +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> > @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
> >   	drm_atomic_helper_plane_destroy_state(plane, state);
> >   }
> >   
> > +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> > +				   const struct intel_plane_state *plane_state)
> > +{
> > +	const struct drm_framebuffer *fb = plane_state->base.fb;
> > +	unsigned int cpp = 0;
> > +	int i;
> > +
> > +	if (!plane_state->base.visible)
> > +		return 0;
> > +
> > +	for (i = 0; i < fb->format->num_planes; i++)
> > +		cpp += fb->format->cpp[i];
> > +
> > +	return cpp * crtc_state->pixel_rate;
> > +}
> > +
> >   int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
> >   					struct intel_crtc_state *new_crtc_state,
> >   					const struct intel_plane_state *old_plane_state,
> > @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
> >   	new_crtc_state->active_planes &= ~BIT(plane->id);
> >   	new_crtc_state->nv12_planes &= ~BIT(plane->id);
> >   	new_crtc_state->c8_planes &= ~BIT(plane->id);
> > +	new_crtc_state->data_rate[plane->id] = 0;
> >   	new_plane_state->base.visible = false;
> >   
> >   	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> > @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
> >   	if (new_plane_state->base.visible || old_plane_state->base.visible)
> >   		new_crtc_state->update_planes |= BIT(plane->id);
> >   
> > +	new_crtc_state->data_rate[plane->id] =
> > +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> > +
> >   	return intel_plane_atomic_calc_changes(old_crtc_state,
> >   					       &new_crtc_state->base,
> >   					       old_plane_state,
> > diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> > index 14678620440f..0a9651376d0e 100644
> > --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> > +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> > @@ -15,6 +15,8 @@ struct intel_plane_state;
> >   
> >   extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
> >   
> > +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> > +				   const struct intel_plane_state *plane_state);
> >   void intel_update_plane(struct intel_plane *plane,
> >   			const struct intel_crtc_state *crtc_state,
> >   			const struct intel_plane_state *plane_state);
> > diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> > new file mode 100644
> > index 000000000000..304bf87f0a2e
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/intel_bw.c
> > @@ -0,0 +1,181 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2019 Intel Corporation
> > + */
> > +
> > +#include <drm/drm_atomic_state_helper.h>
> > +
> > +#include "intel_bw.h"
> > +#include "intel_drv.h"
> > +
> > +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> > +{
> > +	/*
> > +	 * We assume cursors are small enough
> > +	 * to not not cause bandwidth problems.
> > +	 */
> > +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
> > +}
> > +
> > +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> > +{
> > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > +	unsigned int data_rate = 0;
> > +	enum plane_id plane_id;
> > +
> > +	for_each_plane_id_on_crtc(crtc, plane_id) {
> > +		/*
> > +		 * We assume cursors are small enough
> > +		 * to not not cause bandwidth problems.
> > +		 */
> > +		if (plane_id == PLANE_CURSOR)
> > +			continue;
> > +
> > +		data_rate += crtc_state->data_rate[plane_id];
> > +	}
> > +
> > +	return data_rate;
> > +}
> > +
> > +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> > +			  const struct intel_crtc_state *crtc_state)
> > +{
> > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > +
> > +	bw_state->data_rate[crtc->pipe] =
> > +		intel_bw_crtc_data_rate(crtc_state);
> > +	bw_state->num_active_planes[crtc->pipe] =
> > +		intel_bw_crtc_num_active_planes(crtc_state);
> > +
> > +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> > +		      pipe_name(crtc->pipe),
> > +		      bw_state->data_rate[crtc->pipe],
> > +		      bw_state->num_active_planes[crtc->pipe]);
> > +}
> > +
> > +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> > +					       const struct intel_bw_state *bw_state)
> > +{
> > +	unsigned int num_active_planes = 0;
> > +	enum pipe pipe;
> > +
> > +	for_each_pipe(dev_priv, pipe)
> > +		num_active_planes += bw_state->num_active_planes[pipe];
> > +
> > +	return num_active_planes;
> > +}
> > +
> > +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> > +				       const struct intel_bw_state *bw_state)
> > +{
> > +	unsigned int data_rate = 0;
> > +	enum pipe pipe;
> > +
> > +	for_each_pipe(dev_priv, pipe)
> > +		data_rate += bw_state->data_rate[pipe];
> > +
> > +	return data_rate;
> > +}
> > +
> > +int intel_bw_atomic_check(struct intel_atomic_state *state)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> > +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> > +	struct intel_bw_state *bw_state = NULL;
> > +	unsigned int data_rate, max_data_rate;
> > +	unsigned int num_active_planes;
> > +	struct intel_crtc *crtc;
> > +	int i;
> > +
> > +	/* FIXME earlier gens need some checks too */
> > +	if (INTEL_GEN(dev_priv) < 11)
> > +		return 0;
> > +
> > +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> > +					    new_crtc_state, i) {
> > +		unsigned int old_data_rate =
> > +			intel_bw_crtc_data_rate(old_crtc_state);
> > +		unsigned int new_data_rate =
> > +			intel_bw_crtc_data_rate(new_crtc_state);
> > +		unsigned int old_active_planes =
> > +			intel_bw_crtc_num_active_planes(old_crtc_state);
> > +		unsigned int new_active_planes =
> > +			intel_bw_crtc_num_active_planes(new_crtc_state);
> > +
> > +		/*
> > +		 * Avoid locking the bw state when
> > +		 * nothing significant has changed.
> > +		 */
> > +		if (old_data_rate == new_data_rate &&
> > +		    old_active_planes == new_active_planes)
> > +			continue;
> > +
> > +		bw_state  = intel_atomic_get_bw_state(state);
> > +		if (IS_ERR(bw_state))
> > +			return PTR_ERR(bw_state);
> > +
> > +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> > +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> > +
> > +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> > +			      pipe_name(crtc->pipe),
> > +			      bw_state->data_rate[crtc->pipe],
> > +			      bw_state->num_active_planes[crtc->pipe]);
> > +	}
> > +
> > +	if (!bw_state)
> > +		return 0;
> > +
> > +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> > +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> > +
> > +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> > +
> > +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> > +
> > +	if (data_rate > max_data_rate) {
> > +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> > +			      data_rate, max_data_rate, num_active_planes);
> > +		return -EINVAL;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> > +{
> > +	struct intel_bw_state *state;
> > +
> > +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> > +	if (!state)
> > +		return NULL;
> > +
> > +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> > +
> > +	return &state->base;
> > +}
> > +
> > +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> > +				   struct drm_private_state *state)
> > +{
> > +	kfree(state);
> > +}
> > +
> > +static const struct drm_private_state_funcs intel_bw_funcs = {
> > +	.atomic_duplicate_state = intel_bw_duplicate_state,
> > +	.atomic_destroy_state = intel_bw_destroy_state,
> > +};
> > +
> > +int intel_bw_init(struct drm_i915_private *dev_priv)
> > +{
> > +	struct intel_bw_state *state;
> > +
> > +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> > +	if (!state)
> > +		return -ENOMEM;
> > +
> > +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> > +				    &state->base, &intel_bw_funcs);
> > +
> > +	return 0;
> > +}
> > diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> > new file mode 100644
> > index 000000000000..c14272ca5b59
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/intel_bw.h
> > @@ -0,0 +1,46 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright © 2019 Intel Corporation
> > + */
> > +
> > +#ifndef __INTEL_BW_H__
> > +#define __INTEL_BW_H__
> > +
> > +#include <drm/drm_atomic.h>
> > +
> > +#include "i915_drv.h"
> > +#include "intel_display.h"
> > +
> > +struct drm_i915_private;
> > +struct intel_atomic_state;
> > +struct intel_crtc_state;
> > +
> > +struct intel_bw_state {
> > +	struct drm_private_state base;
> > +
> > +	unsigned int data_rate[I915_MAX_PIPES];
> > +	u8 num_active_planes[I915_MAX_PIPES];
> > +};
> > +
> > +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> > +
> > +static inline struct intel_bw_state *
> > +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> > +	struct drm_private_state *bw_state;
> > +
> > +	bw_state = drm_atomic_get_private_obj_state(&state->base,
> > +						    &dev_priv->bw_obj);
> > +	if (IS_ERR(bw_state))
> > +		return ERR_CAST(bw_state);
> > +
> > +	return to_intel_bw_state(bw_state);
> > +}
> > +
> > +int intel_bw_init(struct drm_i915_private *dev_priv);
> > +int intel_bw_atomic_check(struct intel_atomic_state *state);
> > +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> > +			  const struct intel_crtc_state *crtc_state);
> > +
> > +#endif /* __INTEL_BW_H__ */
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index d81ec80e34f6..a955840b73cb 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -50,6 +50,7 @@
> >   #include "intel_acpi.h"
> >   #include "intel_atomic.h"
> >   #include "intel_atomic_plane.h"
> > +#include "intel_bw.h"
> >   #include "intel_color.h"
> >   #include "intel_cdclk.h"
> >   #include "intel_crt.h"
> > @@ -2863,6 +2864,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
> >   
> >   	intel_set_plane_visible(crtc_state, plane_state, false);
> >   	fixup_active_planes(crtc_state);
> > +	crtc_state->data_rate[plane->id] = 0;
> >   
> >   	if (plane->id == PLANE_PRIMARY)
> >   		intel_pre_disable_primary_noatomic(&crtc->base);
> > @@ -6590,6 +6592,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
> >   	struct intel_encoder *encoder;
> >   	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >   	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> > +	struct intel_bw_state *bw_state =
> > +		to_intel_bw_state(dev_priv->bw_obj.state);
> >   	enum intel_display_power_domain domain;
> >   	struct intel_plane *plane;
> >   	u64 domains;
> > @@ -6652,6 +6656,9 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
> >   	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
> >   	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
> >   	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
> > +
> > +	bw_state->data_rate[intel_crtc->pipe] = 0;
> > +	bw_state->num_active_planes[intel_crtc->pipe] = 0;
> >   }
> >   
> >   /*
> > @@ -11176,6 +11183,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
> >   	if (!is_crtc_enabled) {
> >   		plane_state->visible = visible = false;
> >   		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
> > +		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
> >   	}
> >   
> >   	if (!was_visible && !visible)
> > @@ -13296,7 +13304,15 @@ static int intel_atomic_check(struct drm_device *dev,
> >   		return ret;
> >   
> >   	intel_fbc_choose_crtc(dev_priv, intel_state);
> > -	return calc_watermark_data(intel_state);
> > +	ret = calc_watermark_data(intel_state);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = intel_bw_atomic_check(intel_state);
> This should be before calc_watermark_data. If we don't have the 
> bandwidth why calculate the WMs.

I wanted to keep it last to minimize the time we hold the global
lock. Although maybe we might want to consider the bw when
calculating the watermarks. There are some bandwidth related
workarounds in the wm code for earlier platforms that might be
relaxed if we had an actual idea on the bw utilization. I'll
have to think about it.

> > +	if (ret)
> > +		return ret;
> > +
> > +	return 0;
> >   }
> >   
> >   static int intel_atomic_prepare_commit(struct drm_device *dev,
> > @@ -15696,6 +15712,10 @@ int intel_modeset_init(struct drm_device *dev)
> >   
> >   	drm_mode_config_init(dev);
> >   
> > +	ret = intel_bw_init(dev_priv);
> > +	if (ret)
> > +		return ret;
> > +
> >   	dev->mode_config.min_width = 0;
> >   	dev->mode_config.min_height = 0;
> >   
> > @@ -16318,8 +16338,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
> >   	drm_connector_list_iter_end(&conn_iter);
> >   
> >   	for_each_intel_crtc(dev, crtc) {
> > +		struct intel_bw_state *bw_state =
> > +			to_intel_bw_state(dev_priv->bw_obj.state);
> >   		struct intel_crtc_state *crtc_state =
> >   			to_intel_crtc_state(crtc->base.state);
> > +		struct intel_plane *plane;
> >   		int min_cdclk = 0;
> >   
> >   		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
> > @@ -16358,6 +16381,21 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
> >   		dev_priv->min_voltage_level[crtc->pipe] =
> >   			crtc_state->min_voltage_level;
> >   
> > +		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
> > +			const struct intel_plane_state *plane_state =
> > +				to_intel_plane_state(plane->base.state);
> > +
> > +			/*
> > +			 * FIXME don't have the fb yet, so can't
> > +			 * use intel_plane_data_rate() :(
> > +			 */
> > +			if (plane_state->base.visible)
> > +				crtc_state->data_rate[plane->id] =
> > +					4 * crtc_state->pixel_rate;
> > +		}
> > +
> > +		intel_bw_crtc_update(bw_state, crtc_state);
> > +
> >   		intel_pipe_config_sanity_check(dev_priv, crtc_state);
> >   	}
> >   }
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > index 4049e03d2c0d..47f551601a05 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -885,6 +885,8 @@ struct intel_crtc_state {
> >   
> >   	struct intel_crtc_wm_state wm;
> >   
> > +	u32 data_rate[I915_MAX_PLANES];
> > +
> >   	/* Gamma mode programmed on the pipe */
> >   	u32 gamma_mode;
> >
Sripada, Radhakrishna May 8, 2019, 9:05 p.m. UTC | #3
On Fri, 2019-05-03 at 22:08 +0300, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> ICL has so many planes that it can easily exceed the maximum
> effective memory bandwidth of the system. We must therefore check
> that we don't exceed that limit.
> 
> The algorithm is very magic number heavy and lacks sufficient
> explanation for now. We also have no sane way to query the
> memory clock and timings, so we must rely on a combination of
> raw readout from the memory controller and hardcoded assumptions.
> The memory controller values obviously change as the system
> jumps between the different SAGV points, so we try to stabilize
> it first by disabling SAGV for the duration of the readout.
> 
> The utilized bandwidth is tracked via a device wide atomic
> private object. That is actually not robust because we can't
> afford to enforce strict global ordering between the pipes.
> Thus I think I'll need to change this to simply chop up the
> available bandwidth between all the active pipes. Each pipe
> can then do whatever it wants as long as it doesn't exceed
> its budget. That scheme will also require that we assume that
> any number of planes could be active at any time.
> 
> TODO: make it robust and deal with all the open questions
> 
> v2: Sleep longer after disabling SAGV
> v3: Poll for the dclk to get raised (seen it take 250ms!)
>     If the system has 2133MT/s memory then we pointlessly
>     wait one full second :(
> v4: Use the new pcode interface to get the qgv points rather
>     that using hardcoded numbers
> 
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile             |   1 +
>  drivers/gpu/drm/i915/i915_drv.c           | 229
> ++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_drv.h           |  10 +
>  drivers/gpu/drm/i915/i915_reg.h           |   3 +
>  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
>  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
>  drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
>  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
>  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
>  drivers/gpu/drm/i915/intel_drv.h          |   2 +
>  10 files changed, 533 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
>  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile
> b/drivers/gpu/drm/i915/Makefile
> index 68106fe35a04..139a0fc19390 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
>  	  intel_atomic.o \
>  	  intel_atomic_plane.o \
>  	  intel_bios.o \
> +	  intel_bw.o \
>  	  intel_cdclk.o \
>  	  intel_color.o \
>  	  intel_combo_phy.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c
> b/drivers/gpu/drm/i915/i915_drv.c
> index 5ed864752c7b..b7fa7b51c2e2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -70,6 +70,7 @@
>  #include "intel_overlay.h"
>  #include "intel_pipe_crc.h"
>  #include "intel_pm.h"
> +#include "intel_sideband.h"
>  #include "intel_sprite.h"
>  #include "intel_uc.h"
>  
> @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private
> *dev_priv)
>  	return 0;
>  }
>  
> +struct intel_qgv_point {
> +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> +};
> +
> +struct intel_sagv_info {
> +	struct intel_qgv_point points[3];
> +	u8 num_points;
> +	u8 num_channels;
> +	u8 t_bl;
> +	enum intel_dram_type dram_type;
> +};
> +
> +static int icl_pcode_read_mem_global_info(struct drm_i915_private
> *dev_priv,
> +					  struct intel_sagv_info *si)
> +{
> +	u32 val = 0;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> +				     &val, NULL);
> +	if (ret)
> +		return ret;
> +
> +	switch (val & 0xf) {
> +	case 0:
> +		si->dram_type = INTEL_DRAM_DDR4;
> +		break;
> +	case 1:
> +		si->dram_type = INTEL_DRAM_DDR3;
> +		break;
> +	case 2:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	case 3:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	default:
> +		MISSING_CASE(val & 0xf);
> +		break;
> +	}
> +
> +	si->num_channels = (val & 0xf0) >> 4;
> +	si->num_points = (val & 0xf00) >> 8;
> +
> +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> +
> +	return 0;
> +}
> +
> +static int icl_pcode_read_qgv_point_info(struct drm_i915_private
> *dev_priv,
> +					 struct intel_qgv_point *sp,
> +					 int point)
Are we trying to retrieve the dram timing parameters to calculate the
latency? If so can that be seperated as latency calculation instead of
using it under bw info below?
> +{
> +	u32 val = 0, val2;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_IN
> FO(point),
> +				     &val, &val2);
> +	if (ret)
> +		return ret;
> +
> +	sp->dclk = val & 0xffff;
> +	sp->t_rp = (val & 0xff0000) >> 16;
> +	sp->t_rcd = (val & 0xff000000) >> 24;
> +
> +	sp->t_rdpre = val2 & 0xff;
> +	sp->t_ras = (val2 & 0xff00) >> 8;
> +
> +	sp->t_rc = sp->t_rp + sp->t_ras;
> +
> +	return 0;
> +}
> +
> +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> +			      struct intel_sagv_info *si)
> +{
> +	int i, ret;
> +
> +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> +	if (ret)
> +		return ret;
> +
> +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> +		si->num_points = ARRAY_SIZE(si->points);
> +
> +	for (i = 0; i < si->num_points; i++) {
> +		struct intel_qgv_point *sp = &si->points[i];
> +
> +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> +		if (ret)
> +			return ret;
> +
> +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d
> tRCD=%d tRC=%d\n",
> +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp-
> >t_ras,
> +			      sp->t_rcd, sp->t_rc);
> +	}
> +
> +	return 0;
> +}
> +
> +static int icl_calc_bw(int dclk, int num, int den)
> +{
> +	/* multiples of 16.666MHz (100/6) */
> +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> +}
> +
> +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> +{
> +	u16 dclk = 0;
> +	int i;
> +
> +	for (i = 0; i < si->num_points; i++)
> +		dclk = max(dclk, si->points[i].dclk);
> +
> +	return dclk;
> +}
> +
> +struct intel_sa_info {
> +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> +};
> +
> +static const struct intel_sa_info icl_sa_info = {
> +	.deburst = 8,
> +	.mpagesize = 16,
> +	.deprogbwlimit = 25, /* GB/s */
> +	.displayrtids = 128,
> +};
> +
> +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_sagv_info si = {};
> +	const struct intel_sa_info *sa = &icl_sa_info;
> +	bool is_y_tile = true; /* assume y tile may be used */
> +	int num_channels;
> +	int deinterleave;
> +	int ipqdepth, ipqdepthpch;
> +	int dclk_max;
> +	int maxdebw;
> +	int i, ret;
> +
> +	ret = icl_get_qgv_points(dev_priv, &si);
> +	if (ret)
> +		return ret;
> +	num_channels = si.num_channels;
> +
> +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> +	dclk_max = icl_sagv_max_dclk(&si);
> +
> +	ipqdepthpch = 16;
> +
> +	maxdebw = min(sa->deprogbwlimit * 1000,
> +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		int clpchgroup;
> +		int j;
> +
> +		clpchgroup = (sa->deburst * deinterleave /
> num_channels) << i;
> +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup +
> 1;
> +
> +		for (j = 0; j < si.num_points; j++) {
> +			const struct intel_qgv_point *sp =
> &si.points[j];
> +			int ct, bw;
> +
> +			/*
> +			 * Max row cycle time
> +			 *
> +			 * FIXME what is the logic behind the
> +			 * assumed burst length?
> +			 */
> +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd
> +
> +				   (clpchgroup - 1) * si.t_bl + sp-
> >t_rdpre);
For logical flow can we move the above timing related calculations to a
seperate function along with fixme to delink bandwidth and latency
calculations?

- Radhakrishna(RK) Sripada
> +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 *
> num_channels, ct);
> +
> +			bi->deratedbw[j] = min(maxdebw,
> +					       bw * 9 / 10); /* 90% */
> +
> +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d
> deratedbw=%d\n",
> +				      i, j, bi->num_planes, bi-
> >deratedbw[j]);
> +		}
> +
> +		if (bi->num_planes == 1)
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
> +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> +			       int num_planes, int qgv_point)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		const struct intel_bw_info *bi =
> +			&dev_priv->max_bw[i];
> +
> +		if (num_planes >= bi->num_planes)
> +			return bi->deratedbw[qgv_point];
> +	}
> +
> +	return 0;
> +}
> +
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes)
> +{
> +	if (IS_ICELAKE(dev_priv))
> +		/*
> +		 * FIXME with SAGV disabled maybe we can assume
> +		 * point 1 will always be used? Seems to match
> +		 * the behaviour observed in the wild.
> +		 */
> +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> +			    icl_max_bw(dev_priv, num_planes, 1),
> +			    icl_max_bw(dev_priv, num_planes, 2));
> +	else
> +		return UINT_MAX;
> +}
> +
>  static void
>  intel_get_dram_info(struct drm_i915_private *dev_priv)
>  {
> @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct
> drm_i915_private *dev_priv)
>  	 */
>  	intel_get_dram_info(dev_priv);
>  
> +	if (INTEL_GEN(dev_priv) >= 11)
> +		icl_get_bw_info(dev_priv);
>  
>  	return 0;
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index 64fa353a62bb..d1b9c3fe5802 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -54,6 +54,7 @@
>  #include <drm/drm_cache.h>
>  #include <drm/drm_util.h>
>  #include <drm/drm_dsc.h>
> +#include <drm/drm_atomic.h>
>  #include <drm/drm_connector.h>
>  #include <drm/i915_mei_hdcp_interface.h>
>  
> @@ -1837,6 +1838,13 @@ struct drm_i915_private {
>  		} type;
>  	} dram_info;
>  
> +	struct intel_bw_info {
> +		int num_planes;
> +		int deratedbw[3];
> +	} max_bw[6];
> +
> +	struct drm_private_obj bw_obj;
> +
>  	struct i915_runtime_pm runtime_pm;
>  
>  	struct {
> @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct
> drm_i915_private *dev_priv);
>  extern unsigned long i915_gfx_val(struct drm_i915_private
> *dev_priv);
>  extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
>  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes);
>  
>  u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private
> *dev_priv);
>  
> diff --git a/drivers/gpu/drm/i915/i915_reg.h
> b/drivers/gpu/drm/i915/i915_reg.h
> index e97c47fca645..399366a41524 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8774,6 +8774,9 @@ enum {
>  #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
>  #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
>  #define   GEN6_READ_OC_PARAMS			0xc
> +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((poin
> t) << 16) | (0x1 << 8))
>  #define   GEN6_PCODE_READ_D_COMP		0x10
>  #define   GEN6_PCODE_WRITE_D_COMP		0x11
>  #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c
> b/drivers/gpu/drm/i915/intel_atomic_plane.c
> index d11681d71add..f142c5c22d7e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane
> *plane,
>  	drm_atomic_helper_plane_destroy_state(plane, state);
>  }
>  
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state
> *crtc_state,
> +				   const struct intel_plane_state
> *plane_state)
> +{
> +	const struct drm_framebuffer *fb = plane_state->base.fb;
> +	unsigned int cpp = 0;
> +	int i;
> +
> +	if (!plane_state->base.visible)
> +		return 0;
> +
> +	for (i = 0; i < fb->format->num_planes; i++)
> +		cpp += fb->format->cpp[i];
> +
> +	return cpp * crtc_state->pixel_rate;
> +}
> +
>  int intel_plane_atomic_check_with_state(const struct
> intel_crtc_state *old_crtc_state,
>  					struct intel_crtc_state
> *new_crtc_state,
>  					const struct intel_plane_state
> *old_plane_state,
> @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const
> struct intel_crtc_state *old_crtc_
>  	new_crtc_state->active_planes &= ~BIT(plane->id);
>  	new_crtc_state->nv12_planes &= ~BIT(plane->id);
>  	new_crtc_state->c8_planes &= ~BIT(plane->id);
> +	new_crtc_state->data_rate[plane->id] = 0;
>  	new_plane_state->base.visible = false;
>  
>  	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const
> struct intel_crtc_state *old_crtc_
>  	if (new_plane_state->base.visible || old_plane_state-
> >base.visible)
>  		new_crtc_state->update_planes |= BIT(plane->id);
>  
> +	new_crtc_state->data_rate[plane->id] =
> +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> +
>  	return intel_plane_atomic_calc_changes(old_crtc_state,
>  					       &new_crtc_state->base,
>  					       old_plane_state,
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h
> b/drivers/gpu/drm/i915/intel_atomic_plane.h
> index 14678620440f..0a9651376d0e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> @@ -15,6 +15,8 @@ struct intel_plane_state;
>  
>  extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
>  
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state
> *crtc_state,
> +				   const struct intel_plane_state
> *plane_state);
>  void intel_update_plane(struct intel_plane *plane,
>  			const struct intel_crtc_state *crtc_state,
>  			const struct intel_plane_state *plane_state);
> diff --git a/drivers/gpu/drm/i915/intel_bw.c
> b/drivers/gpu/drm/i915/intel_bw.c
> new file mode 100644
> index 000000000000..304bf87f0a2e
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include <drm/drm_atomic_state_helper.h>
> +
> +#include "intel_bw.h"
> +#include "intel_drv.h"
> +
> +static unsigned int intel_bw_crtc_num_active_planes(const struct
> intel_crtc_state *crtc_state)
> +{
> +	/*
> +	 * We assume cursors are small enough
> +	 * to not not cause bandwidth problems.
> +	 */
Can this assumption be made when the cursor sizes are 256x256(chrome)?
> +	return hweight8(crtc_state->active_planes &
> ~BIT(PLANE_CURSOR));
> +}
> +
> +static unsigned int intel_bw_crtc_data_rate(const struct
> intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	unsigned int data_rate = 0;
> +	enum plane_id plane_id;
> +
> +	for_each_plane_id_on_crtc(crtc, plane_id) {
> +		/*
> +		 * We assume cursors are small enough
> +		 * to not not cause bandwidth problems.
> +		 */
> +		if (plane_id == PLANE_CURSOR)
> +			continue;
> +
> +		data_rate += crtc_state->data_rate[plane_id];
> +	}
> +
> +	return data_rate;
> +}
> +
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +
> +	bw_state->data_rate[crtc->pipe] =
> +		intel_bw_crtc_data_rate(crtc_state);
> +	bw_state->num_active_planes[crtc->pipe] =
> +		intel_bw_crtc_num_active_planes(crtc_state);
> +
> +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +		      pipe_name(crtc->pipe),
> +		      bw_state->data_rate[crtc->pipe],
> +		      bw_state->num_active_planes[crtc->pipe]);
> +}
> +
> +static unsigned int intel_bw_num_active_planes(struct
> drm_i915_private *dev_priv,
> +					       const struct
> intel_bw_state *bw_state)
> +{
> +	unsigned int num_active_planes = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		num_active_planes += bw_state->num_active_planes[pipe];
> +
> +	return num_active_planes;
> +}
> +
> +static unsigned int intel_bw_data_rate(struct drm_i915_private
> *dev_priv,
> +				       const struct intel_bw_state
> *bw_state)
> +{
> +	unsigned int data_rate = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		data_rate += bw_state->data_rate[pipe];
> +
> +	return data_rate;
> +}
> +
> +int intel_bw_atomic_check(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> +	struct intel_bw_state *bw_state = NULL;
> +	unsigned int data_rate, max_data_rate;
> +	unsigned int num_active_planes;
> +	struct intel_crtc *crtc;
> +	int i;
> +
> +	/* FIXME earlier gens need some checks too */
> +	if (INTEL_GEN(dev_priv) < 11)
> +		return 0;
> +
> +	for_each_oldnew_intel_crtc_in_state(state, crtc,
> old_crtc_state,
> +					    new_crtc_state, i) {
> +		unsigned int old_data_rate =
> +			intel_bw_crtc_data_rate(old_crtc_state);
> +		unsigned int new_data_rate =
> +			intel_bw_crtc_data_rate(new_crtc_state);
> +		unsigned int old_active_planes =
> +			intel_bw_crtc_num_active_planes(old_crtc_state)
> ;
> +		unsigned int new_active_planes =
> +			intel_bw_crtc_num_active_planes(new_crtc_state)
> ;
> +
> +		/*
> +		 * Avoid locking the bw state when
> +		 * nothing significant has changed.
> +		 */
> +		if (old_data_rate == new_data_rate &&
> +		    old_active_planes == new_active_planes)
> +			continue;
> +
> +		bw_state  = intel_atomic_get_bw_state(state);
> +		if (IS_ERR(bw_state))
> +			return PTR_ERR(bw_state);
> +
> +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> +		bw_state->num_active_planes[crtc->pipe] =
> new_active_planes;
> +
> +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes
> %u\n",
> +			      pipe_name(crtc->pipe),
> +			      bw_state->data_rate[crtc->pipe],
> +			      bw_state->num_active_planes[crtc->pipe]);
> +	}
> +
> +	if (!bw_state)
> +		return 0;
> +
> +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> +	num_active_planes = intel_bw_num_active_planes(dev_priv,
> bw_state);
> +
> +	max_data_rate = intel_max_data_rate(dev_priv,
> num_active_planes);
> +
> +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> +
> +	if (data_rate > max_data_rate) {
> +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available
> %d MB/s (%d active planes)\n",
> +			      data_rate, max_data_rate,
> num_active_planes);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct drm_private_state *intel_bw_duplicate_state(struct
> drm_private_obj *obj)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return NULL;
> +
> +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state-
> >base);
> +
> +	return &state->base;
> +}
> +
> +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> +				   struct drm_private_state *state)
> +{
> +	kfree(state);
> +}
> +
> +static const struct drm_private_state_funcs intel_bw_funcs = {
> +	.atomic_duplicate_state = intel_bw_duplicate_state,
> +	.atomic_destroy_state = intel_bw_destroy_state,
> +};
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return -ENOMEM;
> +
> +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> +				    &state->base, &intel_bw_funcs);
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_bw.h
> b/drivers/gpu/drm/i915/intel_bw.h
> new file mode 100644
> index 000000000000..c14272ca5b59
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef __INTEL_BW_H__
> +#define __INTEL_BW_H__
> +
> +#include <drm/drm_atomic.h>
> +
> +#include "i915_drv.h"
> +#include "intel_display.h"
> +
> +struct drm_i915_private;
> +struct intel_atomic_state;
> +struct intel_crtc_state;
> +
> +struct intel_bw_state {
> +	struct drm_private_state base;
> +
> +	unsigned int data_rate[I915_MAX_PIPES];
> +	u8 num_active_planes[I915_MAX_PIPES];
> +};
> +
> +#define to_intel_bw_state(x) container_of((x), struct
> intel_bw_state, base)
> +
> +static inline struct intel_bw_state *
> +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct drm_private_state *bw_state;
> +
> +	bw_state = drm_atomic_get_private_obj_state(&state->base,
> +						    &dev_priv->bw_obj);
> +	if (IS_ERR(bw_state))
> +		return ERR_CAST(bw_state);
> +
> +	return to_intel_bw_state(bw_state);
> +}
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv);
> +int intel_bw_atomic_check(struct intel_atomic_state *state);
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state);
> +
> +#endif /* __INTEL_BW_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_display.c
> b/drivers/gpu/drm/i915/intel_display.c
> index d81ec80e34f6..a955840b73cb 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -50,6 +50,7 @@
>  #include "intel_acpi.h"
>  #include "intel_atomic.h"
>  #include "intel_atomic_plane.h"
> +#include "intel_bw.h"
>  #include "intel_color.h"
>  #include "intel_cdclk.h"
>  #include "intel_crt.h"
> @@ -2863,6 +2864,7 @@ static void intel_plane_disable_noatomic(struct
> intel_crtc *crtc,
>  
>  	intel_set_plane_visible(crtc_state, plane_state, false);
>  	fixup_active_planes(crtc_state);
> +	crtc_state->data_rate[plane->id] = 0;
>  
>  	if (plane->id == PLANE_PRIMARY)
>  		intel_pre_disable_primary_noatomic(&crtc->base);
> @@ -6590,6 +6592,8 @@ static void intel_crtc_disable_noatomic(struct
> drm_crtc *crtc,
>  	struct intel_encoder *encoder;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> +	struct intel_bw_state *bw_state =
> +		to_intel_bw_state(dev_priv->bw_obj.state);
>  	enum intel_display_power_domain domain;
>  	struct intel_plane *plane;
>  	u64 domains;
> @@ -6652,6 +6656,9 @@ static void intel_crtc_disable_noatomic(struct
> drm_crtc *crtc,
>  	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
>  	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
>  	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
> +
> +	bw_state->data_rate[intel_crtc->pipe] = 0;
> +	bw_state->num_active_planes[intel_crtc->pipe] = 0;
>  }
>  
>  /*
> @@ -11176,6 +11183,7 @@ int intel_plane_atomic_calc_changes(const
> struct intel_crtc_state *old_crtc_stat
>  	if (!is_crtc_enabled) {
>  		plane_state->visible = visible = false;
>  		to_intel_crtc_state(crtc_state)->active_planes &=
> ~BIT(plane->id);
> +		to_intel_crtc_state(crtc_state)->data_rate[plane->id] =
> 0;
>  	}
>  
>  	if (!was_visible && !visible)
> @@ -13296,7 +13304,15 @@ static int intel_atomic_check(struct
> drm_device *dev,
>  		return ret;
>  
>  	intel_fbc_choose_crtc(dev_priv, intel_state);
> -	return calc_watermark_data(intel_state);
> +	ret = calc_watermark_data(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_bw_atomic_check(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
>  }
>  
>  static int intel_atomic_prepare_commit(struct drm_device *dev,
> @@ -15696,6 +15712,10 @@ int intel_modeset_init(struct drm_device
> *dev)
>  
>  	drm_mode_config_init(dev);
>  
> +	ret = intel_bw_init(dev_priv);
> +	if (ret)
> +		return ret;
> +
>  	dev->mode_config.min_width = 0;
>  	dev->mode_config.min_height = 0;
>  
> @@ -16318,8 +16338,11 @@ static void
> intel_modeset_readout_hw_state(struct drm_device *dev)
>  	drm_connector_list_iter_end(&conn_iter);
>  
>  	for_each_intel_crtc(dev, crtc) {
> +		struct intel_bw_state *bw_state =
> +			to_intel_bw_state(dev_priv->bw_obj.state);
>  		struct intel_crtc_state *crtc_state =
>  			to_intel_crtc_state(crtc->base.state);
> +		struct intel_plane *plane;
>  		int min_cdclk = 0;
>  
>  		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
> @@ -16358,6 +16381,21 @@ static void
> intel_modeset_readout_hw_state(struct drm_device *dev)
>  		dev_priv->min_voltage_level[crtc->pipe] =
>  			crtc_state->min_voltage_level;
>  
> +		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc,
> plane) {
> +			const struct intel_plane_state *plane_state =
> +				to_intel_plane_state(plane-
> >base.state);
> +
> +			/*
> +			 * FIXME don't have the fb yet, so can't
> +			 * use intel_plane_data_rate() :(
> +			 */
> +			if (plane_state->base.visible)
> +				crtc_state->data_rate[plane->id] =
> +					4 * crtc_state->pixel_rate;
> +		}
> +
> +		intel_bw_crtc_update(bw_state, crtc_state);
> +
>  		intel_pipe_config_sanity_check(dev_priv, crtc_state);
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/intel_drv.h
> b/drivers/gpu/drm/i915/intel_drv.h
> index 4049e03d2c0d..47f551601a05 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -885,6 +885,8 @@ struct intel_crtc_state {
>  
>  	struct intel_crtc_wm_state wm;
>  
> +	u32 data_rate[I915_MAX_PLANES];
> +
>  	/* Gamma mode programmed on the pipe */
>  	u32 gamma_mode;
>
Matt Roper May 11, 2019, 12:42 a.m. UTC | #4
On Fri, May 03, 2019 at 10:08:31PM +0300, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> ICL has so many planes that it can easily exceed the maximum
> effective memory bandwidth of the system. We must therefore check
> that we don't exceed that limit.
> 
> The algorithm is very magic number heavy and lacks sufficient
> explanation for now. We also have no sane way to query the
> memory clock and timings, so we must rely on a combination of
> raw readout from the memory controller and hardcoded assumptions.
> The memory controller values obviously change as the system
> jumps between the different SAGV points, so we try to stabilize
> it first by disabling SAGV for the duration of the readout.
> 
> The utilized bandwidth is tracked via a device wide atomic
> private object. That is actually not robust because we can't
> afford to enforce strict global ordering between the pipes.
> Thus I think I'll need to change this to simply chop up the
> available bandwidth between all the active pipes. Each pipe
> can then do whatever it wants as long as it doesn't exceed
> its budget. That scheme will also require that we assume that
> any number of planes could be active at any time.
> 
> TODO: make it robust and deal with all the open questions
> 
> v2: Sleep longer after disabling SAGV
> v3: Poll for the dclk to get raised (seen it take 250ms!)
>     If the system has 2133MT/s memory then we pointlessly
>     wait one full second :(
> v4: Use the new pcode interface to get the qgv points rather
>     that using hardcoded numbers
> 
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile             |   1 +
>  drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_drv.h           |  10 +
>  drivers/gpu/drm/i915/i915_reg.h           |   3 +
>  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
>  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
>  drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
>  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
>  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
>  drivers/gpu/drm/i915/intel_drv.h          |   2 +
>  10 files changed, 533 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
>  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 68106fe35a04..139a0fc19390 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
>  	  intel_atomic.o \
>  	  intel_atomic_plane.o \
>  	  intel_bios.o \
> +	  intel_bw.o \
>  	  intel_cdclk.o \
>  	  intel_color.o \
>  	  intel_combo_phy.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 5ed864752c7b..b7fa7b51c2e2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -70,6 +70,7 @@
>  #include "intel_overlay.h"
>  #include "intel_pipe_crc.h"
>  #include "intel_pm.h"
> +#include "intel_sideband.h"
>  #include "intel_sprite.h"
>  #include "intel_uc.h"
>  
> @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
>  	return 0;
>  }
>  
> +struct intel_qgv_point {
> +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> +};
> +
> +struct intel_sagv_info {
> +	struct intel_qgv_point points[3];
> +	u8 num_points;
> +	u8 num_channels;
> +	u8 t_bl;
> +	enum intel_dram_type dram_type;
> +};
> +
> +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> +					  struct intel_sagv_info *si)
> +{
> +	u32 val = 0;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> +				     &val, NULL);
> +	if (ret)
> +		return ret;
> +
> +	switch (val & 0xf) {
> +	case 0:
> +		si->dram_type = INTEL_DRAM_DDR4;
> +		break;
> +	case 1:
> +		si->dram_type = INTEL_DRAM_DDR3;
> +		break;
> +	case 2:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	case 3:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	default:
> +		MISSING_CASE(val & 0xf);
> +		break;
> +	}
> +
> +	si->num_channels = (val & 0xf0) >> 4;
> +	si->num_points = (val & 0xf00) >> 8;
> +
> +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> +
> +	return 0;
> +}
> +
> +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> +					 struct intel_qgv_point *sp,
> +					 int point)
> +{
> +	u32 val = 0, val2;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> +				     &val, &val2);
> +	if (ret)
> +		return ret;
> +
> +	sp->dclk = val & 0xffff;
> +	sp->t_rp = (val & 0xff0000) >> 16;
> +	sp->t_rcd = (val & 0xff000000) >> 24;
> +
> +	sp->t_rdpre = val2 & 0xff;
> +	sp->t_ras = (val2 & 0xff00) >> 8;
> +
> +	sp->t_rc = sp->t_rp + sp->t_ras;
> +
> +	return 0;
> +}
> +
> +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> +			      struct intel_sagv_info *si)
> +{
> +	int i, ret;
> +
> +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> +	if (ret)
> +		return ret;
> +
> +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> +		si->num_points = ARRAY_SIZE(si->points);
> +
> +	for (i = 0; i < si->num_points; i++) {
> +		struct intel_qgv_point *sp = &si->points[i];
> +
> +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> +		if (ret)
> +			return ret;
> +
> +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> +			      sp->t_rcd, sp->t_rc);
> +	}
> +
> +	return 0;
> +}

It might make sense to separate the pcode readout stuff into a patch of
its own.  Aside from the si->t_bl assignment, the functions above are
straightforward details from the pcode HAS.  If we wind up needing to
drop the complicated algorithm below and replace it with a different
one, the pcode readout part won't need to change.

> +
> +static int icl_calc_bw(int dclk, int num, int den)
> +{
> +	/* multiples of 16.666MHz (100/6) */
> +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> +}
> +
> +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> +{
> +	u16 dclk = 0;
> +	int i;
> +
> +	for (i = 0; i < si->num_points; i++)
> +		dclk = max(dclk, si->points[i].dclk);
> +
> +	return dclk;
> +}
> +
> +struct intel_sa_info {
> +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> +};
> +
> +static const struct intel_sa_info icl_sa_info = {
> +	.deburst = 8,
> +	.mpagesize = 16,
> +	.deprogbwlimit = 25, /* GB/s */
> +	.displayrtids = 128,
> +};
> +
> +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_sagv_info si = {};
> +	const struct intel_sa_info *sa = &icl_sa_info;
> +	bool is_y_tile = true; /* assume y tile may be used */
> +	int num_channels;
> +	int deinterleave;
> +	int ipqdepth, ipqdepthpch;
> +	int dclk_max;
> +	int maxdebw;
> +	int i, ret;
> +
> +	ret = icl_get_qgv_points(dev_priv, &si);
> +	if (ret)
> +		return ret;
> +	num_channels = si.num_channels;
> +
> +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> +	dclk_max = icl_sagv_max_dclk(&si);
> +
> +	ipqdepthpch = 16;
> +
> +	maxdebw = min(sa->deprogbwlimit * 1000,
> +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		int clpchgroup;
> +		int j;
> +
> +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> +
> +		for (j = 0; j < si.num_points; j++) {
> +			const struct intel_qgv_point *sp = &si.points[j];
> +			int ct, bw;
> +
> +			/*
> +			 * Max row cycle time
> +			 *
> +			 * FIXME what is the logic behind the
> +			 * assumed burst length?
> +			 */
> +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);

The HAS document uses *64 instead of *32 for the BW calculation here.
Are we doubling the value somewhere else that I'm overlooking?


> +
> +			bi->deratedbw[j] = min(maxdebw,
> +					       bw * 9 / 10); /* 90% */
> +
> +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> +				      i, j, bi->num_planes, bi->deratedbw[j]);
> +		}
> +
> +		if (bi->num_planes == 1)
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
> +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> +			       int num_planes, int qgv_point)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		const struct intel_bw_info *bi =
> +			&dev_priv->max_bw[i];
> +
> +		if (num_planes >= bi->num_planes)
> +			return bi->deratedbw[qgv_point];
> +	}
> +
> +	return 0;
> +}
> +
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes)
> +{
> +	if (IS_ICELAKE(dev_priv))
> +		/*
> +		 * FIXME with SAGV disabled maybe we can assume
> +		 * point 1 will always be used? Seems to match
> +		 * the behaviour observed in the wild.
> +		 */
> +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> +			    icl_max_bw(dev_priv, num_planes, 1),
> +			    icl_max_bw(dev_priv, num_planes, 2));
> +	else
> +		return UINT_MAX;
> +}

Any specific reason reason some of these functions are in i915_drv.c?
Seems like they could just go in the new intel_bw.c.

> +
>  static void
>  intel_get_dram_info(struct drm_i915_private *dev_priv)
>  {
> @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
>  	 */
>  	intel_get_dram_info(dev_priv);
>  
> +	if (INTEL_GEN(dev_priv) >= 11)
> +		icl_get_bw_info(dev_priv);
>  
>  	return 0;
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 64fa353a62bb..d1b9c3fe5802 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -54,6 +54,7 @@
>  #include <drm/drm_cache.h>
>  #include <drm/drm_util.h>
>  #include <drm/drm_dsc.h>
> +#include <drm/drm_atomic.h>
>  #include <drm/drm_connector.h>
>  #include <drm/i915_mei_hdcp_interface.h>
>  
> @@ -1837,6 +1838,13 @@ struct drm_i915_private {
>  		} type;
>  	} dram_info;
>  
> +	struct intel_bw_info {
> +		int num_planes;
> +		int deratedbw[3];
> +	} max_bw[6];
> +
> +	struct drm_private_obj bw_obj;
> +
>  	struct i915_runtime_pm runtime_pm;
>  
>  	struct {
> @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
>  extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
>  extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
>  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes);
>  
>  u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
>  
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e97c47fca645..399366a41524 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8774,6 +8774,9 @@ enum {
>  #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
>  #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
>  #define   GEN6_READ_OC_PARAMS			0xc
> +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
>  #define   GEN6_PCODE_READ_D_COMP		0x10
>  #define   GEN6_PCODE_WRITE_D_COMP		0x11
>  #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> index d11681d71add..f142c5c22d7e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
>  	drm_atomic_helper_plane_destroy_state(plane, state);
>  }
>  
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state)
> +{
> +	const struct drm_framebuffer *fb = plane_state->base.fb;
> +	unsigned int cpp = 0;
> +	int i;
> +
> +	if (!plane_state->base.visible)
> +		return 0;
> +
> +	for (i = 0; i < fb->format->num_planes; i++)
> +		cpp += fb->format->cpp[i];

Will this handle NV12 properly?  This will give us 1+2, but the
algorithm document indicates "NV12 should be considered as 4 bytes per
pixel."

> +
> +	return cpp * crtc_state->pixel_rate;
> +}
> +
>  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
>  					struct intel_crtc_state *new_crtc_state,
>  					const struct intel_plane_state *old_plane_state,
> @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>  	new_crtc_state->active_planes &= ~BIT(plane->id);
>  	new_crtc_state->nv12_planes &= ~BIT(plane->id);
>  	new_crtc_state->c8_planes &= ~BIT(plane->id);
> +	new_crtc_state->data_rate[plane->id] = 0;
>  	new_plane_state->base.visible = false;
>  
>  	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>  	if (new_plane_state->base.visible || old_plane_state->base.visible)
>  		new_crtc_state->update_planes |= BIT(plane->id);
>  
> +	new_crtc_state->data_rate[plane->id] =
> +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> +
>  	return intel_plane_atomic_calc_changes(old_crtc_state,
>  					       &new_crtc_state->base,
>  					       old_plane_state,
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> index 14678620440f..0a9651376d0e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> @@ -15,6 +15,8 @@ struct intel_plane_state;
>  
>  extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
>  
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state);
>  void intel_update_plane(struct intel_plane *plane,
>  			const struct intel_crtc_state *crtc_state,
>  			const struct intel_plane_state *plane_state);
> diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> new file mode 100644
> index 000000000000..304bf87f0a2e
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include <drm/drm_atomic_state_helper.h>
> +
> +#include "intel_bw.h"
> +#include "intel_drv.h"
> +
> +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> +{
> +	/*
> +	 * We assume cursors are small enough
> +	 * to not not cause bandwidth problems.
> +	 */
> +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));

Do we also need to account for NV12 slave planes?  IIRC, they're not set
in the active_planes bitmask, but they're still reading from DRAM, so I
imagine they'd count toward the calculations here?

> +}
> +
> +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	unsigned int data_rate = 0;
> +	enum plane_id plane_id;
> +
> +	for_each_plane_id_on_crtc(crtc, plane_id) {
> +		/*
> +		 * We assume cursors are small enough
> +		 * to not not cause bandwidth problems.
> +		 */
> +		if (plane_id == PLANE_CURSOR)
> +			continue;
> +
> +		data_rate += crtc_state->data_rate[plane_id];
> +	}
> +
> +	return data_rate;
> +}
> +
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +
> +	bw_state->data_rate[crtc->pipe] =
> +		intel_bw_crtc_data_rate(crtc_state);
> +	bw_state->num_active_planes[crtc->pipe] =
> +		intel_bw_crtc_num_active_planes(crtc_state);
> +
> +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +		      pipe_name(crtc->pipe),
> +		      bw_state->data_rate[crtc->pipe],
> +		      bw_state->num_active_planes[crtc->pipe]);
> +}
> +
> +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> +					       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int num_active_planes = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		num_active_planes += bw_state->num_active_planes[pipe];
> +
> +	return num_active_planes;
> +}
> +
> +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> +				       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int data_rate = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		data_rate += bw_state->data_rate[pipe];
> +
> +	return data_rate;
> +}
> +
> +int intel_bw_atomic_check(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> +	struct intel_bw_state *bw_state = NULL;
> +	unsigned int data_rate, max_data_rate;
> +	unsigned int num_active_planes;
> +	struct intel_crtc *crtc;
> +	int i;
> +
> +	/* FIXME earlier gens need some checks too */
> +	if (INTEL_GEN(dev_priv) < 11)
> +		return 0;
> +
> +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> +					    new_crtc_state, i) {
> +		unsigned int old_data_rate =
> +			intel_bw_crtc_data_rate(old_crtc_state);
> +		unsigned int new_data_rate =
> +			intel_bw_crtc_data_rate(new_crtc_state);
> +		unsigned int old_active_planes =
> +			intel_bw_crtc_num_active_planes(old_crtc_state);
> +		unsigned int new_active_planes =
> +			intel_bw_crtc_num_active_planes(new_crtc_state);
> +
> +		/*
> +		 * Avoid locking the bw state when
> +		 * nothing significant has changed.
> +		 */
> +		if (old_data_rate == new_data_rate &&
> +		    old_active_planes == new_active_planes)
> +			continue;
> +
> +		bw_state  = intel_atomic_get_bw_state(state);
> +		if (IS_ERR(bw_state))
> +			return PTR_ERR(bw_state);
> +
> +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> +
> +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +			      pipe_name(crtc->pipe),
> +			      bw_state->data_rate[crtc->pipe],
> +			      bw_state->num_active_planes[crtc->pipe]);
> +	}
> +
> +	if (!bw_state)
> +		return 0;
> +
> +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> +
> +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> +
> +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> +
> +	if (data_rate > max_data_rate) {
> +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> +			      data_rate, max_data_rate, num_active_planes);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return NULL;
> +
> +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> +
> +	return &state->base;
> +}
> +
> +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> +				   struct drm_private_state *state)
> +{
> +	kfree(state);
> +}
> +
> +static const struct drm_private_state_funcs intel_bw_funcs = {
> +	.atomic_duplicate_state = intel_bw_duplicate_state,
> +	.atomic_destroy_state = intel_bw_destroy_state,
> +};
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return -ENOMEM;
> +
> +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> +				    &state->base, &intel_bw_funcs);
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> new file mode 100644
> index 000000000000..c14272ca5b59
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef __INTEL_BW_H__
> +#define __INTEL_BW_H__
> +
> +#include <drm/drm_atomic.h>
> +
> +#include "i915_drv.h"
> +#include "intel_display.h"
> +
> +struct drm_i915_private;
> +struct intel_atomic_state;
> +struct intel_crtc_state;
> +
> +struct intel_bw_state {
> +	struct drm_private_state base;
> +
> +	unsigned int data_rate[I915_MAX_PIPES];
> +	u8 num_active_planes[I915_MAX_PIPES];
> +};
> +
> +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> +
> +static inline struct intel_bw_state *
> +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct drm_private_state *bw_state;
> +

Do we need to grab a lock here?  Otherwise I don't see anything
preventing parallel commits that update disjoint sets of CRTCs 
from exceeding the combined memory bandwidth.

Granted, grabbing a central lock seems really painful since then plane
updates on one CRTC may wind up blocking plane updates on another CRTC.
But I don't know if there's an easy way around that with the algorithm
we've been given either.  Was the mention of potentially chopping up the
bandwidth in the commit message how you plan to address this?  I.e.,
treat it like we do with DDB space on modesets --- since we globally
lock everything on any modeset we can divide it up at that point and
then intra-CRTC updates will only have to worry about their own
allocation thereafter?  That could potentially limit plane usage more
than is actually necessary in some cases; not sure how to weigh the
impact of that vs the downside of stuff like adding/removing/resizing
planes blocking block updates to unrelated CRTC's.



Matt

> +	bw_state = drm_atomic_get_private_obj_state(&state->base,
> +						    &dev_priv->bw_obj);
> +	if (IS_ERR(bw_state))
> +		return ERR_CAST(bw_state);
> +
> +	return to_intel_bw_state(bw_state);
> +}
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv);
> +int intel_bw_atomic_check(struct intel_atomic_state *state);
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state);
> +
> +#endif /* __INTEL_BW_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index d81ec80e34f6..a955840b73cb 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -50,6 +50,7 @@
>  #include "intel_acpi.h"
>  #include "intel_atomic.h"
>  #include "intel_atomic_plane.h"
> +#include "intel_bw.h"
>  #include "intel_color.h"
>  #include "intel_cdclk.h"
>  #include "intel_crt.h"
> @@ -2863,6 +2864,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
>  
>  	intel_set_plane_visible(crtc_state, plane_state, false);
>  	fixup_active_planes(crtc_state);
> +	crtc_state->data_rate[plane->id] = 0;
>  
>  	if (plane->id == PLANE_PRIMARY)
>  		intel_pre_disable_primary_noatomic(&crtc->base);
> @@ -6590,6 +6592,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>  	struct intel_encoder *encoder;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> +	struct intel_bw_state *bw_state =
> +		to_intel_bw_state(dev_priv->bw_obj.state);
>  	enum intel_display_power_domain domain;
>  	struct intel_plane *plane;
>  	u64 domains;
> @@ -6652,6 +6656,9 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>  	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
>  	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
>  	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
> +
> +	bw_state->data_rate[intel_crtc->pipe] = 0;
> +	bw_state->num_active_planes[intel_crtc->pipe] = 0;
>  }
>  
>  /*
> @@ -11176,6 +11183,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
>  	if (!is_crtc_enabled) {
>  		plane_state->visible = visible = false;
>  		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
> +		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
>  	}
>  
>  	if (!was_visible && !visible)
> @@ -13296,7 +13304,15 @@ static int intel_atomic_check(struct drm_device *dev,
>  		return ret;
>  
>  	intel_fbc_choose_crtc(dev_priv, intel_state);
> -	return calc_watermark_data(intel_state);
> +	ret = calc_watermark_data(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_bw_atomic_check(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
>  }
>  
>  static int intel_atomic_prepare_commit(struct drm_device *dev,
> @@ -15696,6 +15712,10 @@ int intel_modeset_init(struct drm_device *dev)
>  
>  	drm_mode_config_init(dev);
>  
> +	ret = intel_bw_init(dev_priv);
> +	if (ret)
> +		return ret;
> +
>  	dev->mode_config.min_width = 0;
>  	dev->mode_config.min_height = 0;
>  
> @@ -16318,8 +16338,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>  	drm_connector_list_iter_end(&conn_iter);
>  
>  	for_each_intel_crtc(dev, crtc) {
> +		struct intel_bw_state *bw_state =
> +			to_intel_bw_state(dev_priv->bw_obj.state);
>  		struct intel_crtc_state *crtc_state =
>  			to_intel_crtc_state(crtc->base.state);
> +		struct intel_plane *plane;
>  		int min_cdclk = 0;
>  
>  		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
> @@ -16358,6 +16381,21 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>  		dev_priv->min_voltage_level[crtc->pipe] =
>  			crtc_state->min_voltage_level;
>  
> +		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
> +			const struct intel_plane_state *plane_state =
> +				to_intel_plane_state(plane->base.state);
> +
> +			/*
> +			 * FIXME don't have the fb yet, so can't
> +			 * use intel_plane_data_rate() :(
> +			 */
> +			if (plane_state->base.visible)
> +				crtc_state->data_rate[plane->id] =
> +					4 * crtc_state->pixel_rate;
> +		}
> +
> +		intel_bw_crtc_update(bw_state, crtc_state);
> +
>  		intel_pipe_config_sanity_check(dev_priv, crtc_state);
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 4049e03d2c0d..47f551601a05 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -885,6 +885,8 @@ struct intel_crtc_state {
>  
>  	struct intel_crtc_wm_state wm;
>  
> +	u32 data_rate[I915_MAX_PLANES];
> +
>  	/* Gamma mode programmed on the pipe */
>  	u32 gamma_mode;
>  
> -- 
> 2.21.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Maarten Lankhorst May 13, 2019, 10:58 a.m. UTC | #5
Op 03-05-2019 om 21:08 schreef Ville Syrjala:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> ICL has so many planes that it can easily exceed the maximum
> effective memory bandwidth of the system. We must therefore check
> that we don't exceed that limit.
>
> The algorithm is very magic number heavy and lacks sufficient
> explanation for now. We also have no sane way to query the
> memory clock and timings, so we must rely on a combination of
> raw readout from the memory controller and hardcoded assumptions.
> The memory controller values obviously change as the system
> jumps between the different SAGV points, so we try to stabilize
> it first by disabling SAGV for the duration of the readout.
>
> The utilized bandwidth is tracked via a device wide atomic
> private object. That is actually not robust because we can't
> afford to enforce strict global ordering between the pipes.
> Thus I think I'll need to change this to simply chop up the
> available bandwidth between all the active pipes. Each pipe
> can then do whatever it wants as long as it doesn't exceed
> its budget. That scheme will also require that we assume that
> any number of planes could be active at any time.
>
> TODO: make it robust and deal with all the open questions
>
> v2: Sleep longer after disabling SAGV
> v3: Poll for the dclk to get raised (seen it take 250ms!)
>     If the system has 2133MT/s memory then we pointlessly
>     wait one full second :(
> v4: Use the new pcode interface to get the qgv points rather
>     that using hardcoded numbers
>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
Splitting out the HW readout would be nice, it would make it easier to review the separate parts that this patch tries to accomplish. :)
>  drivers/gpu/drm/i915/Makefile             |   1 +
>  drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_drv.h           |  10 +
>  drivers/gpu/drm/i915/i915_reg.h           |   3 +
>  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
>  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
>  drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
>  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
>  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
>  drivers/gpu/drm/i915/intel_drv.h          |   2 +
>  10 files changed, 533 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
>  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 68106fe35a04..139a0fc19390 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
>  	  intel_atomic.o \
>  	  intel_atomic_plane.o \
>  	  intel_bios.o \
> +	  intel_bw.o \
>  	  intel_cdclk.o \
>  	  intel_color.o \
>  	  intel_combo_phy.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 5ed864752c7b..b7fa7b51c2e2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -70,6 +70,7 @@
>  #include "intel_overlay.h"
>  #include "intel_pipe_crc.h"
>  #include "intel_pm.h"
> +#include "intel_sideband.h"
>  #include "intel_sprite.h"
>  #include "intel_uc.h"
>  
> @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
>  	return 0;
>  }
>  
> +struct intel_qgv_point {
> +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> +};
> +
> +struct intel_sagv_info {
> +	struct intel_qgv_point points[3];
> +	u8 num_points;
> +	u8 num_channels;
> +	u8 t_bl;
> +	enum intel_dram_type dram_type;
> +};
> +
> +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> +					  struct intel_sagv_info *si)
> +{
> +	u32 val = 0;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> +				     &val, NULL);
> +	if (ret)
> +		return ret;
> +
> +	switch (val & 0xf) {
> +	case 0:
> +		si->dram_type = INTEL_DRAM_DDR4;
> +		break;
> +	case 1:
> +		si->dram_type = INTEL_DRAM_DDR3;
> +		break;
> +	case 2:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	case 3:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	default:
> +		MISSING_CASE(val & 0xf);
> +		break;
> +	}
> +
> +	si->num_channels = (val & 0xf0) >> 4;
> +	si->num_points = (val & 0xf00) >> 8;
> +
> +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> +
> +	return 0;
> +}
> +
> +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> +					 struct intel_qgv_point *sp,
> +					 int point)
> +{
> +	u32 val = 0, val2;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> +				     &val, &val2);
> +	if (ret)
> +		return ret;
> +
> +	sp->dclk = val & 0xffff;
> +	sp->t_rp = (val & 0xff0000) >> 16;
> +	sp->t_rcd = (val & 0xff000000) >> 24;
> +
> +	sp->t_rdpre = val2 & 0xff;
> +	sp->t_ras = (val2 & 0xff00) >> 8;
> +
> +	sp->t_rc = sp->t_rp + sp->t_ras;
> +
> +	return 0;
> +}
> +
> +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> +			      struct intel_sagv_info *si)
> +{
> +	int i, ret;
> +
> +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> +	if (ret)
> +		return ret;
> +
> +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> +		si->num_points = ARRAY_SIZE(si->points);
> +
> +	for (i = 0; i < si->num_points; i++) {
> +		struct intel_qgv_point *sp = &si->points[i];
> +
> +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> +		if (ret)
> +			return ret;
> +
> +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> +			      sp->t_rcd, sp->t_rc);
> +	}
> +
> +	return 0;
> +}
> +
> +static int icl_calc_bw(int dclk, int num, int den)
> +{
> +	/* multiples of 16.666MHz (100/6) */
> +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> +}
> +
> +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> +{
> +	u16 dclk = 0;
> +	int i;
> +
> +	for (i = 0; i < si->num_points; i++)
> +		dclk = max(dclk, si->points[i].dclk);
> +
> +	return dclk;
> +}
> +
> +struct intel_sa_info {
> +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> +};
> +
> +static const struct intel_sa_info icl_sa_info = {
> +	.deburst = 8,
> +	.mpagesize = 16,
> +	.deprogbwlimit = 25, /* GB/s */
> +	.displayrtids = 128,
> +};
> +
> +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_sagv_info si = {};
> +	const struct intel_sa_info *sa = &icl_sa_info;
> +	bool is_y_tile = true; /* assume y tile may be used */
> +	int num_channels;
> +	int deinterleave;
> +	int ipqdepth, ipqdepthpch;
> +	int dclk_max;
> +	int maxdebw;
> +	int i, ret;
> +
> +	ret = icl_get_qgv_points(dev_priv, &si);
> +	if (ret)
> +		return ret;
> +	num_channels = si.num_channels;
> +
> +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> +	dclk_max = icl_sagv_max_dclk(&si);
> +
> +	ipqdepthpch = 16;
> +
> +	maxdebw = min(sa->deprogbwlimit * 1000,
> +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		int clpchgroup;
> +		int j;
> +
> +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> +
> +		for (j = 0; j < si.num_points; j++) {
> +			const struct intel_qgv_point *sp = &si.points[j];
> +			int ct, bw;
> +
> +			/*
> +			 * Max row cycle time
> +			 *
> +			 * FIXME what is the logic behind the
> +			 * assumed burst length?
> +			 */
> +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> +
> +			bi->deratedbw[j] = min(maxdebw,
> +					       bw * 9 / 10); /* 90% */
> +
> +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> +				      i, j, bi->num_planes, bi->deratedbw[j]);
> +		}
> +
> +		if (bi->num_planes == 1)
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
> +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> +			       int num_planes, int qgv_point)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		const struct intel_bw_info *bi =
> +			&dev_priv->max_bw[i];
> +
> +		if (num_planes >= bi->num_planes)
> +			return bi->deratedbw[qgv_point];
> +	}
> +
> +	return 0;
> +}
> +
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes)
> +{
> +	if (IS_ICELAKE(dev_priv))
> +		/*
> +		 * FIXME with SAGV disabled maybe we can assume
> +		 * point 1 will always be used? Seems to match
> +		 * the behaviour observed in the wild.
> +		 */
> +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> +			    icl_max_bw(dev_priv, num_planes, 1),
> +			    icl_max_bw(dev_priv, num_planes, 2));
> +	else
> +		return UINT_MAX;
> +}
> +
>  static void
>  intel_get_dram_info(struct drm_i915_private *dev_priv)
>  {
> @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
>  	 */
>  	intel_get_dram_info(dev_priv);
>  
> +	if (INTEL_GEN(dev_priv) >= 11)
> +		icl_get_bw_info(dev_priv);
>  
>  	return 0;
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 64fa353a62bb..d1b9c3fe5802 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -54,6 +54,7 @@
>  #include <drm/drm_cache.h>
>  #include <drm/drm_util.h>
>  #include <drm/drm_dsc.h>
> +#include <drm/drm_atomic.h>
>  #include <drm/drm_connector.h>
>  #include <drm/i915_mei_hdcp_interface.h>
>  
> @@ -1837,6 +1838,13 @@ struct drm_i915_private {
>  		} type;
>  	} dram_info;
>  
> +	struct intel_bw_info {
> +		int num_planes;
> +		int deratedbw[3];
> +	} max_bw[6];
> +
> +	struct drm_private_obj bw_obj;
> +
>  	struct i915_runtime_pm runtime_pm;
>  
>  	struct {
> @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
>  extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
>  extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
>  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes);
>  
>  u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
>  
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e97c47fca645..399366a41524 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8774,6 +8774,9 @@ enum {
>  #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
>  #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
>  #define   GEN6_READ_OC_PARAMS			0xc
> +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
>  #define   GEN6_PCODE_READ_D_COMP		0x10
>  #define   GEN6_PCODE_WRITE_D_COMP		0x11
>  #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> index d11681d71add..f142c5c22d7e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
>  	drm_atomic_helper_plane_destroy_state(plane, state);
>  }
>  
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state)
> +{
> +	const struct drm_framebuffer *fb = plane_state->base.fb;
> +	unsigned int cpp = 0;
> +	int i;
> +
> +	if (!plane_state->base.visible)
> +		return 0;
> +
> +	for (i = 0; i < fb->format->num_planes; i++)
> +		cpp += fb->format->cpp[i];
> +
> +	return cpp * crtc_state->pixel_rate;
> +}
> +
>  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
>  					struct intel_crtc_state *new_crtc_state,
>  					const struct intel_plane_state *old_plane_state,
> @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>  	new_crtc_state->active_planes &= ~BIT(plane->id);
>  	new_crtc_state->nv12_planes &= ~BIT(plane->id);
>  	new_crtc_state->c8_planes &= ~BIT(plane->id);
> +	new_crtc_state->data_rate[plane->id] = 0;
>  	new_plane_state->base.visible = false;
>  
>  	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>  	if (new_plane_state->base.visible || old_plane_state->base.visible)
>  		new_crtc_state->update_planes |= BIT(plane->id);
>  
> +	new_crtc_state->data_rate[plane->id] =
> +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> +
>  	return intel_plane_atomic_calc_changes(old_crtc_state,
>  					       &new_crtc_state->base,
>  					       old_plane_state,
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> index 14678620440f..0a9651376d0e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> @@ -15,6 +15,8 @@ struct intel_plane_state;
>  
>  extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
>  
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state);
>  void intel_update_plane(struct intel_plane *plane,
>  			const struct intel_crtc_state *crtc_state,
>  			const struct intel_plane_state *plane_state);
> diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> new file mode 100644
> index 000000000000..304bf87f0a2e
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include <drm/drm_atomic_state_helper.h>
> +
> +#include "intel_bw.h"
> +#include "intel_drv.h"
> +
> +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> +{
> +	/*
> +	 * We assume cursors are small enough
> +	 * to not not cause bandwidth problems.
> +	 */
> +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
> +}
> +
> +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	unsigned int data_rate = 0;
> +	enum plane_id plane_id;
> +
> +	for_each_plane_id_on_crtc(crtc, plane_id) {
> +		/*
> +		 * We assume cursors are small enough
> +		 * to not not cause bandwidth problems.
> +		 */
> +		if (plane_id == PLANE_CURSOR)
> +			continue;
> +
> +		data_rate += crtc_state->data_rate[plane_id];
> +	}
> +
> +	return data_rate;
> +}
> +
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +
> +	bw_state->data_rate[crtc->pipe] =
> +		intel_bw_crtc_data_rate(crtc_state);
> +	bw_state->num_active_planes[crtc->pipe] =
> +		intel_bw_crtc_num_active_planes(crtc_state);
> +
> +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +		      pipe_name(crtc->pipe),
> +		      bw_state->data_rate[crtc->pipe],
> +		      bw_state->num_active_planes[crtc->pipe]);
> +}
> +
> +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> +					       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int num_active_planes = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		num_active_planes += bw_state->num_active_planes[pipe];
> +
> +	return num_active_planes;
> +}
> +
> +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> +				       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int data_rate = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		data_rate += bw_state->data_rate[pipe];
> +
> +	return data_rate;
> +}
> +
> +int intel_bw_atomic_check(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> +	struct intel_bw_state *bw_state = NULL;
> +	unsigned int data_rate, max_data_rate;
> +	unsigned int num_active_planes;
> +	struct intel_crtc *crtc;
> +	int i;
> +
> +	/* FIXME earlier gens need some checks too */
> +	if (INTEL_GEN(dev_priv) < 11)
> +		return 0;
> +
> +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> +					    new_crtc_state, i) {
> +		unsigned int old_data_rate =
> +			intel_bw_crtc_data_rate(old_crtc_state);
> +		unsigned int new_data_rate =
> +			intel_bw_crtc_data_rate(new_crtc_state);
> +		unsigned int old_active_planes =
> +			intel_bw_crtc_num_active_planes(old_crtc_state);
> +		unsigned int new_active_planes =
> +			intel_bw_crtc_num_active_planes(new_crtc_state);
> +
> +		/*
> +		 * Avoid locking the bw state when
> +		 * nothing significant has changed.
> +		 */
> +		if (old_data_rate == new_data_rate &&
> +		    old_active_planes == new_active_planes)
> +			continue;
> +
> +		bw_state  = intel_atomic_get_bw_state(state);
> +		if (IS_ERR(bw_state))
> +			return PTR_ERR(bw_state);
> +
> +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> +
> +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +			      pipe_name(crtc->pipe),
> +			      bw_state->data_rate[crtc->pipe],
> +			      bw_state->num_active_planes[crtc->pipe]);
> +	}
> +
> +	if (!bw_state)
> +		return 0;
> +
> +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> +
> +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> +
> +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> +
> +	if (data_rate > max_data_rate) {
> +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> +			      data_rate, max_data_rate, num_active_planes);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return NULL;
> +
> +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> +
> +	return &state->base;
> +}
> +
> +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> +				   struct drm_private_state *state)
> +{
> +	kfree(state);
> +}
> +
> +static const struct drm_private_state_funcs intel_bw_funcs = {
> +	.atomic_duplicate_state = intel_bw_duplicate_state,
> +	.atomic_destroy_state = intel_bw_destroy_state,
> +};
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return -ENOMEM;
> +
> +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> +				    &state->base, &intel_bw_funcs);
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> new file mode 100644
> index 000000000000..c14272ca5b59
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef __INTEL_BW_H__
> +#define __INTEL_BW_H__
> +
> +#include <drm/drm_atomic.h>
> +
> +#include "i915_drv.h"
> +#include "intel_display.h"
> +
> +struct drm_i915_private;
> +struct intel_atomic_state;
> +struct intel_crtc_state;
> +
> +struct intel_bw_state {
> +	struct drm_private_state base;
> +
> +	unsigned int data_rate[I915_MAX_PIPES];
> +	u8 num_active_planes[I915_MAX_PIPES];
> +};
> +
> +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> +
> +static inline struct intel_bw_state *
> +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct drm_private_state *bw_state;
> +
> +	bw_state = drm_atomic_get_private_obj_state(&state->base,
> +						    &dev_priv->bw_obj);
> +	if (IS_ERR(bw_state))
> +		return ERR_CAST(bw_state);
> +
> +	return to_intel_bw_state(bw_state);
> +}
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv);
> +int intel_bw_atomic_check(struct intel_atomic_state *state);
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state);
> +
> +#endif /* __INTEL_BW_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index d81ec80e34f6..a955840b73cb 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -50,6 +50,7 @@
>  #include "intel_acpi.h"
>  #include "intel_atomic.h"
>  #include "intel_atomic_plane.h"
> +#include "intel_bw.h"
>  #include "intel_color.h"
>  #include "intel_cdclk.h"
>  #include "intel_crt.h"
> @@ -2863,6 +2864,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
>  
>  	intel_set_plane_visible(crtc_state, plane_state, false);
>  	fixup_active_planes(crtc_state);
> +	crtc_state->data_rate[plane->id] = 0;
>  
>  	if (plane->id == PLANE_PRIMARY)
>  		intel_pre_disable_primary_noatomic(&crtc->base);
> @@ -6590,6 +6592,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>  	struct intel_encoder *encoder;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> +	struct intel_bw_state *bw_state =
> +		to_intel_bw_state(dev_priv->bw_obj.state);
>  	enum intel_display_power_domain domain;
>  	struct intel_plane *plane;
>  	u64 domains;
> @@ -6652,6 +6656,9 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>  	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
>  	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
>  	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
> +
> +	bw_state->data_rate[intel_crtc->pipe] = 0;
> +	bw_state->num_active_planes[intel_crtc->pipe] = 0;
>  }
>  
>  /*
> @@ -11176,6 +11183,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
>  	if (!is_crtc_enabled) {
>  		plane_state->visible = visible = false;
>  		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
> +		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
>  	}
>  
>  	if (!was_visible && !visible)
> @@ -13296,7 +13304,15 @@ static int intel_atomic_check(struct drm_device *dev,
>  		return ret;
>  
>  	intel_fbc_choose_crtc(dev_priv, intel_state);
> -	return calc_watermark_data(intel_state);
> +	ret = calc_watermark_data(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_bw_atomic_check(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
>  }
>  
>  static int intel_atomic_prepare_commit(struct drm_device *dev,
> @@ -15696,6 +15712,10 @@ int intel_modeset_init(struct drm_device *dev)
>  
>  	drm_mode_config_init(dev);
>  
> +	ret = intel_bw_init(dev_priv);
> +	if (ret)
> +		return ret;
> +
>  	dev->mode_config.min_width = 0;
>  	dev->mode_config.min_height = 0;
>  
> @@ -16318,8 +16338,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>  	drm_connector_list_iter_end(&conn_iter);
>  
>  	for_each_intel_crtc(dev, crtc) {
> +		struct intel_bw_state *bw_state =
> +			to_intel_bw_state(dev_priv->bw_obj.state);
>  		struct intel_crtc_state *crtc_state =
>  			to_intel_crtc_state(crtc->base.state);
> +		struct intel_plane *plane;
>  		int min_cdclk = 0;
>  
>  		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
> @@ -16358,6 +16381,21 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>  		dev_priv->min_voltage_level[crtc->pipe] =
>  			crtc_state->min_voltage_level;
>  
> +		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
> +			const struct intel_plane_state *plane_state =
> +				to_intel_plane_state(plane->base.state);
> +
> +			/*
> +			 * FIXME don't have the fb yet, so can't
> +			 * use intel_plane_data_rate() :(
> +			 */
> +			if (plane_state->base.visible)
> +				crtc_state->data_rate[plane->id] =
> +					4 * crtc_state->pixel_rate;
> +		}
> +
> +		intel_bw_crtc_update(bw_state, crtc_state);
> +
>  		intel_pipe_config_sanity_check(dev_priv, crtc_state);
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 4049e03d2c0d..47f551601a05 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -885,6 +885,8 @@ struct intel_crtc_state {
>  
>  	struct intel_crtc_wm_state wm;
>  
> +	u32 data_rate[I915_MAX_PLANES];
> +
>  	/* Gamma mode programmed on the pipe */
>  	u32 gamma_mode;
>
Ville Syrjälä May 13, 2019, 2:13 p.m. UTC | #6
On Fri, May 10, 2019 at 05:42:09PM -0700, Matt Roper wrote:
> On Fri, May 03, 2019 at 10:08:31PM +0300, Ville Syrjala wrote:
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > 
> > ICL has so many planes that it can easily exceed the maximum
> > effective memory bandwidth of the system. We must therefore check
> > that we don't exceed that limit.
> > 
> > The algorithm is very magic number heavy and lacks sufficient
> > explanation for now. We also have no sane way to query the
> > memory clock and timings, so we must rely on a combination of
> > raw readout from the memory controller and hardcoded assumptions.
> > The memory controller values obviously change as the system
> > jumps between the different SAGV points, so we try to stabilize
> > it first by disabling SAGV for the duration of the readout.
> > 
> > The utilized bandwidth is tracked via a device wide atomic
> > private object. That is actually not robust because we can't
> > afford to enforce strict global ordering between the pipes.
> > Thus I think I'll need to change this to simply chop up the
> > available bandwidth between all the active pipes. Each pipe
> > can then do whatever it wants as long as it doesn't exceed
> > its budget. That scheme will also require that we assume that
> > any number of planes could be active at any time.
> > 
> > TODO: make it robust and deal with all the open questions
> > 
> > v2: Sleep longer after disabling SAGV
> > v3: Poll for the dclk to get raised (seen it take 250ms!)
> >     If the system has 2133MT/s memory then we pointlessly
> >     wait one full second :(
> > v4: Use the new pcode interface to get the qgv points rather
> >     that using hardcoded numbers
> > 
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/Makefile             |   1 +
> >  drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_drv.h           |  10 +
> >  drivers/gpu/drm/i915/i915_reg.h           |   3 +
> >  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
> >  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
> >  drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
> >  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
> >  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
> >  drivers/gpu/drm/i915/intel_drv.h          |   2 +
> >  10 files changed, 533 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
> >  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> > 
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index 68106fe35a04..139a0fc19390 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
> >  	  intel_atomic.o \
> >  	  intel_atomic_plane.o \
> >  	  intel_bios.o \
> > +	  intel_bw.o \
> >  	  intel_cdclk.o \
> >  	  intel_color.o \
> >  	  intel_combo_phy.o \
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 5ed864752c7b..b7fa7b51c2e2 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -70,6 +70,7 @@
> >  #include "intel_overlay.h"
> >  #include "intel_pipe_crc.h"
> >  #include "intel_pm.h"
> > +#include "intel_sideband.h"
> >  #include "intel_sprite.h"
> >  #include "intel_uc.h"
> >  
> > @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
> >  	return 0;
> >  }
> >  
> > +struct intel_qgv_point {
> > +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> > +};
> > +
> > +struct intel_sagv_info {
> > +	struct intel_qgv_point points[3];
> > +	u8 num_points;
> > +	u8 num_channels;
> > +	u8 t_bl;
> > +	enum intel_dram_type dram_type;
> > +};
> > +
> > +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> > +					  struct intel_sagv_info *si)
> > +{
> > +	u32 val = 0;
> > +	int ret;
> > +
> > +	ret = sandybridge_pcode_read(dev_priv,
> > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> > +				     &val, NULL);
> > +	if (ret)
> > +		return ret;
> > +
> > +	switch (val & 0xf) {
> > +	case 0:
> > +		si->dram_type = INTEL_DRAM_DDR4;
> > +		break;
> > +	case 1:
> > +		si->dram_type = INTEL_DRAM_DDR3;
> > +		break;
> > +	case 2:
> > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > +		break;
> > +	case 3:
> > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > +		break;
> > +	default:
> > +		MISSING_CASE(val & 0xf);
> > +		break;
> > +	}
> > +
> > +	si->num_channels = (val & 0xf0) >> 4;
> > +	si->num_points = (val & 0xf00) >> 8;
> > +
> > +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> > +					 struct intel_qgv_point *sp,
> > +					 int point)
> > +{
> > +	u32 val = 0, val2;
> > +	int ret;
> > +
> > +	ret = sandybridge_pcode_read(dev_priv,
> > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> > +				     &val, &val2);
> > +	if (ret)
> > +		return ret;
> > +
> > +	sp->dclk = val & 0xffff;
> > +	sp->t_rp = (val & 0xff0000) >> 16;
> > +	sp->t_rcd = (val & 0xff000000) >> 24;
> > +
> > +	sp->t_rdpre = val2 & 0xff;
> > +	sp->t_ras = (val2 & 0xff00) >> 8;
> > +
> > +	sp->t_rc = sp->t_rp + sp->t_ras;
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> > +			      struct intel_sagv_info *si)
> > +{
> > +	int i, ret;
> > +
> > +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> > +		si->num_points = ARRAY_SIZE(si->points);
> > +
> > +	for (i = 0; i < si->num_points; i++) {
> > +		struct intel_qgv_point *sp = &si->points[i];
> > +
> > +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> > +		if (ret)
> > +			return ret;
> > +
> > +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> > +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> > +			      sp->t_rcd, sp->t_rc);
> > +	}
> > +
> > +	return 0;
> > +}
> 
> It might make sense to separate the pcode readout stuff into a patch of
> its own.  Aside from the si->t_bl assignment, the functions above are
> straightforward details from the pcode HAS.  If we wind up needing to
> drop the complicated algorithm below and replace it with a different
> one, the pcode readout part won't need to change.

Perhaps. OTOH it does mean having a patch that adds a bunch
of unused code.

> 
> > +
> > +static int icl_calc_bw(int dclk, int num, int den)
> > +{
> > +	/* multiples of 16.666MHz (100/6) */
> > +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> > +}
> > +
> > +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> > +{
> > +	u16 dclk = 0;
> > +	int i;
> > +
> > +	for (i = 0; i < si->num_points; i++)
> > +		dclk = max(dclk, si->points[i].dclk);
> > +
> > +	return dclk;
> > +}
> > +
> > +struct intel_sa_info {
> > +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> > +};
> > +
> > +static const struct intel_sa_info icl_sa_info = {
> > +	.deburst = 8,
> > +	.mpagesize = 16,
> > +	.deprogbwlimit = 25, /* GB/s */
> > +	.displayrtids = 128,
> > +};
> > +
> > +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> > +{
> > +	struct intel_sagv_info si = {};
> > +	const struct intel_sa_info *sa = &icl_sa_info;
> > +	bool is_y_tile = true; /* assume y tile may be used */
> > +	int num_channels;
> > +	int deinterleave;
> > +	int ipqdepth, ipqdepthpch;
> > +	int dclk_max;
> > +	int maxdebw;
> > +	int i, ret;
> > +
> > +	ret = icl_get_qgv_points(dev_priv, &si);
> > +	if (ret)
> > +		return ret;
> > +	num_channels = si.num_channels;
> > +
> > +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> > +	dclk_max = icl_sagv_max_dclk(&si);
> > +
> > +	ipqdepthpch = 16;
> > +
> > +	maxdebw = min(sa->deprogbwlimit * 1000,
> > +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> > +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> > +
> > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> > +		int clpchgroup;
> > +		int j;
> > +
> > +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> > +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> > +
> > +		for (j = 0; j < si.num_points; j++) {
> > +			const struct intel_qgv_point *sp = &si.points[j];
> > +			int ct, bw;
> > +
> > +			/*
> > +			 * Max row cycle time
> > +			 *
> > +			 * FIXME what is the logic behind the
> > +			 * assumed burst length?
> > +			 */
> > +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> > +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> > +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> 
> The HAS document uses *64 instead of *32 for the BW calculation here.
> Are we doubling the value somewhere else that I'm overlooking?

It was 32 originally, then was updated to 64, and the dropped back to
32 because 64 caused everything to get doubled from what it was supposed
to be. The reason was that pcode is giving doubled values to what the 64
version of the algorithm was expecting.

> 
> 
> > +
> > +			bi->deratedbw[j] = min(maxdebw,
> > +					       bw * 9 / 10); /* 90% */
> > +
> > +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> > +				      i, j, bi->num_planes, bi->deratedbw[j]);
> > +		}
> > +
> > +		if (bi->num_planes == 1)
> > +			break;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> > +			       int num_planes, int qgv_point)
> > +{
> > +	int i;
> > +
> > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > +		const struct intel_bw_info *bi =
> > +			&dev_priv->max_bw[i];
> > +
> > +		if (num_planes >= bi->num_planes)
> > +			return bi->deratedbw[qgv_point];
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> > +				 int num_planes)
> > +{
> > +	if (IS_ICELAKE(dev_priv))
> > +		/*
> > +		 * FIXME with SAGV disabled maybe we can assume
> > +		 * point 1 will always be used? Seems to match
> > +		 * the behaviour observed in the wild.
> > +		 */
> > +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> > +			    icl_max_bw(dev_priv, num_planes, 1),
> > +			    icl_max_bw(dev_priv, num_planes, 2));
> > +	else
> > +		return UINT_MAX;
> > +}
> 
> Any specific reason reason some of these functions are in i915_drv.c?
> Seems like they could just go in the new intel_bw.c.

They're here mostly because the DRAM readout was here. But yeah, should
probably move it all to intel_bw.c.

> 
> > +
> >  static void
> >  intel_get_dram_info(struct drm_i915_private *dev_priv)
> >  {
> > @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
> >  	 */
> >  	intel_get_dram_info(dev_priv);
> >  
> > +	if (INTEL_GEN(dev_priv) >= 11)
> > +		icl_get_bw_info(dev_priv);
> >  
> >  	return 0;
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 64fa353a62bb..d1b9c3fe5802 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -54,6 +54,7 @@
> >  #include <drm/drm_cache.h>
> >  #include <drm/drm_util.h>
> >  #include <drm/drm_dsc.h>
> > +#include <drm/drm_atomic.h>
> >  #include <drm/drm_connector.h>
> >  #include <drm/i915_mei_hdcp_interface.h>
> >  
> > @@ -1837,6 +1838,13 @@ struct drm_i915_private {
> >  		} type;
> >  	} dram_info;
> >  
> > +	struct intel_bw_info {
> > +		int num_planes;
> > +		int deratedbw[3];
> > +	} max_bw[6];
> > +
> > +	struct drm_private_obj bw_obj;
> > +
> >  	struct i915_runtime_pm runtime_pm;
> >  
> >  	struct {
> > @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
> >  extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
> >  extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
> >  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> > +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> > +				 int num_planes);
> >  
> >  u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index e97c47fca645..399366a41524 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -8774,6 +8774,9 @@ enum {
> >  #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
> >  #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
> >  #define   GEN6_READ_OC_PARAMS			0xc
> > +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> > +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> > +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
> >  #define   GEN6_PCODE_READ_D_COMP		0x10
> >  #define   GEN6_PCODE_WRITE_D_COMP		0x11
> >  #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> > diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> > index d11681d71add..f142c5c22d7e 100644
> > --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> > +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> > @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
> >  	drm_atomic_helper_plane_destroy_state(plane, state);
> >  }
> >  
> > +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> > +				   const struct intel_plane_state *plane_state)
> > +{
> > +	const struct drm_framebuffer *fb = plane_state->base.fb;
> > +	unsigned int cpp = 0;
> > +	int i;
> > +
> > +	if (!plane_state->base.visible)
> > +		return 0;
> > +
> > +	for (i = 0; i < fb->format->num_planes; i++)
> > +		cpp += fb->format->cpp[i];
> 
> Will this handle NV12 properly?  This will give us 1+2, but the
> algorithm document indicates "NV12 should be considered as 4 bytes per
> pixel."

Hmm. That is a bit of a strange way to handle NV12. Probably need to ask
why that is the recommendation.

> 
> > +
> > +	return cpp * crtc_state->pixel_rate;
> > +}
> > +
> >  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
> >  					struct intel_crtc_state *new_crtc_state,
> >  					const struct intel_plane_state *old_plane_state,
> > @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
> >  	new_crtc_state->active_planes &= ~BIT(plane->id);
> >  	new_crtc_state->nv12_planes &= ~BIT(plane->id);
> >  	new_crtc_state->c8_planes &= ~BIT(plane->id);
> > +	new_crtc_state->data_rate[plane->id] = 0;
> >  	new_plane_state->base.visible = false;
> >  
> >  	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> > @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
> >  	if (new_plane_state->base.visible || old_plane_state->base.visible)
> >  		new_crtc_state->update_planes |= BIT(plane->id);
> >  
> > +	new_crtc_state->data_rate[plane->id] =
> > +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> > +
> >  	return intel_plane_atomic_calc_changes(old_crtc_state,
> >  					       &new_crtc_state->base,
> >  					       old_plane_state,
> > diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> > index 14678620440f..0a9651376d0e 100644
> > --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> > +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> > @@ -15,6 +15,8 @@ struct intel_plane_state;
> >  
> >  extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
> >  
> > +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> > +				   const struct intel_plane_state *plane_state);
> >  void intel_update_plane(struct intel_plane *plane,
> >  			const struct intel_crtc_state *crtc_state,
> >  			const struct intel_plane_state *plane_state);
> > diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> > new file mode 100644
> > index 000000000000..304bf87f0a2e
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/intel_bw.c
> > @@ -0,0 +1,181 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2019 Intel Corporation
> > + */
> > +
> > +#include <drm/drm_atomic_state_helper.h>
> > +
> > +#include "intel_bw.h"
> > +#include "intel_drv.h"
> > +
> > +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> > +{
> > +	/*
> > +	 * We assume cursors are small enough
> > +	 * to not not cause bandwidth problems.
> > +	 */
> > +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
> 
> Do we also need to account for NV12 slave planes?  IIRC, they're not set
> in the active_planes bitmask, but they're still reading from DRAM, so I
> imagine they'd count toward the calculations here?

They should be part of active_planes.

> 
> > +}
> > +
> > +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> > +{
> > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > +	unsigned int data_rate = 0;
> > +	enum plane_id plane_id;
> > +
> > +	for_each_plane_id_on_crtc(crtc, plane_id) {
> > +		/*
> > +		 * We assume cursors are small enough
> > +		 * to not not cause bandwidth problems.
> > +		 */
> > +		if (plane_id == PLANE_CURSOR)
> > +			continue;
> > +
> > +		data_rate += crtc_state->data_rate[plane_id];
> > +	}
> > +
> > +	return data_rate;
> > +}
> > +
> > +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> > +			  const struct intel_crtc_state *crtc_state)
> > +{
> > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > +
> > +	bw_state->data_rate[crtc->pipe] =
> > +		intel_bw_crtc_data_rate(crtc_state);
> > +	bw_state->num_active_planes[crtc->pipe] =
> > +		intel_bw_crtc_num_active_planes(crtc_state);
> > +
> > +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> > +		      pipe_name(crtc->pipe),
> > +		      bw_state->data_rate[crtc->pipe],
> > +		      bw_state->num_active_planes[crtc->pipe]);
> > +}
> > +
> > +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> > +					       const struct intel_bw_state *bw_state)
> > +{
> > +	unsigned int num_active_planes = 0;
> > +	enum pipe pipe;
> > +
> > +	for_each_pipe(dev_priv, pipe)
> > +		num_active_planes += bw_state->num_active_planes[pipe];
> > +
> > +	return num_active_planes;
> > +}
> > +
> > +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> > +				       const struct intel_bw_state *bw_state)
> > +{
> > +	unsigned int data_rate = 0;
> > +	enum pipe pipe;
> > +
> > +	for_each_pipe(dev_priv, pipe)
> > +		data_rate += bw_state->data_rate[pipe];
> > +
> > +	return data_rate;
> > +}
> > +
> > +int intel_bw_atomic_check(struct intel_atomic_state *state)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> > +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> > +	struct intel_bw_state *bw_state = NULL;
> > +	unsigned int data_rate, max_data_rate;
> > +	unsigned int num_active_planes;
> > +	struct intel_crtc *crtc;
> > +	int i;
> > +
> > +	/* FIXME earlier gens need some checks too */
> > +	if (INTEL_GEN(dev_priv) < 11)
> > +		return 0;
> > +
> > +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> > +					    new_crtc_state, i) {
> > +		unsigned int old_data_rate =
> > +			intel_bw_crtc_data_rate(old_crtc_state);
> > +		unsigned int new_data_rate =
> > +			intel_bw_crtc_data_rate(new_crtc_state);
> > +		unsigned int old_active_planes =
> > +			intel_bw_crtc_num_active_planes(old_crtc_state);
> > +		unsigned int new_active_planes =
> > +			intel_bw_crtc_num_active_planes(new_crtc_state);
> > +
> > +		/*
> > +		 * Avoid locking the bw state when
> > +		 * nothing significant has changed.
> > +		 */
> > +		if (old_data_rate == new_data_rate &&
> > +		    old_active_planes == new_active_planes)
> > +			continue;
> > +
> > +		bw_state  = intel_atomic_get_bw_state(state);
> > +		if (IS_ERR(bw_state))
> > +			return PTR_ERR(bw_state);
> > +
> > +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> > +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> > +
> > +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> > +			      pipe_name(crtc->pipe),
> > +			      bw_state->data_rate[crtc->pipe],
> > +			      bw_state->num_active_planes[crtc->pipe]);
> > +	}
> > +
> > +	if (!bw_state)
> > +		return 0;
> > +
> > +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> > +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> > +
> > +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> > +
> > +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> > +
> > +	if (data_rate > max_data_rate) {
> > +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> > +			      data_rate, max_data_rate, num_active_planes);
> > +		return -EINVAL;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> > +{
> > +	struct intel_bw_state *state;
> > +
> > +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> > +	if (!state)
> > +		return NULL;
> > +
> > +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> > +
> > +	return &state->base;
> > +}
> > +
> > +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> > +				   struct drm_private_state *state)
> > +{
> > +	kfree(state);
> > +}
> > +
> > +static const struct drm_private_state_funcs intel_bw_funcs = {
> > +	.atomic_duplicate_state = intel_bw_duplicate_state,
> > +	.atomic_destroy_state = intel_bw_destroy_state,
> > +};
> > +
> > +int intel_bw_init(struct drm_i915_private *dev_priv)
> > +{
> > +	struct intel_bw_state *state;
> > +
> > +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> > +	if (!state)
> > +		return -ENOMEM;
> > +
> > +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> > +				    &state->base, &intel_bw_funcs);
> > +
> > +	return 0;
> > +}
> > diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> > new file mode 100644
> > index 000000000000..c14272ca5b59
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/intel_bw.h
> > @@ -0,0 +1,46 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright © 2019 Intel Corporation
> > + */
> > +
> > +#ifndef __INTEL_BW_H__
> > +#define __INTEL_BW_H__
> > +
> > +#include <drm/drm_atomic.h>
> > +
> > +#include "i915_drv.h"
> > +#include "intel_display.h"
> > +
> > +struct drm_i915_private;
> > +struct intel_atomic_state;
> > +struct intel_crtc_state;
> > +
> > +struct intel_bw_state {
> > +	struct drm_private_state base;
> > +
> > +	unsigned int data_rate[I915_MAX_PIPES];
> > +	u8 num_active_planes[I915_MAX_PIPES];
> > +};
> > +
> > +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> > +
> > +static inline struct intel_bw_state *
> > +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> > +	struct drm_private_state *bw_state;
> > +
> 
> Do we need to grab a lock here?  Otherwise I don't see anything
> preventing parallel commits that update disjoint sets of CRTCs 
> from exceeding the combined memory bandwidth.

Private objs have a lock embedded in them now. But even with that
it is not quite race free since we don't serialize the actual commits.
So there is still a chance of temporary excursion above the limit.

> 
> Granted, grabbing a central lock seems really painful since then plane
> updates on one CRTC may wind up blocking plane updates on another CRTC.
> But I don't know if there's an easy way around that with the algorithm
> we've been given either.  Was the mention of potentially chopping up the
> bandwidth in the commit message how you plan to address this?  I.e.,
> treat it like we do with DDB space on modesets --- since we globally
> lock everything on any modeset we can divide it up at that point and
> then intra-CRTC updates will only have to worry about their own
> allocation thereafter?  That could potentially limit plane usage more
> than is actually necessary in some cases; not sure how to weigh the
> impact of that vs the downside of stuff like adding/removing/resizing
> planes blocking block updates to unrelated CRTC's.

Yeah, that was the idea. This should avoid the aforementioned
problem of temporarily exceeding the bw limit due to arbitrary
commit order. I even started implementing this but ran into
too much code that needs rework, so I put it on hold for now.

And as you note it would likely leave some bandwidth unutilized.
I was also pondering about some kind of hybrid approach where each
pipe would still have its own allocation, but we could somehow
reallocate that dynamically without resorting to a full modeset.
So each pipe would be independent until one reaches its current
bw limit, at which point we try to steal unused bw from the other
pipes for the pipe that ran out. Ie. the global serialization
would only happen when someone is actually pushing the limits.
Ville Syrjälä May 13, 2019, 2:16 p.m. UTC | #7
On Wed, May 08, 2019 at 09:05:06PM +0000, Sripada, Radhakrishna wrote:
> On Fri, 2019-05-03 at 22:08 +0300, Ville Syrjala wrote:
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > 
> > ICL has so many planes that it can easily exceed the maximum
> > effective memory bandwidth of the system. We must therefore check
> > that we don't exceed that limit.
> > 
> > The algorithm is very magic number heavy and lacks sufficient
> > explanation for now. We also have no sane way to query the
> > memory clock and timings, so we must rely on a combination of
> > raw readout from the memory controller and hardcoded assumptions.
> > The memory controller values obviously change as the system
> > jumps between the different SAGV points, so we try to stabilize
> > it first by disabling SAGV for the duration of the readout.
> > 
> > The utilized bandwidth is tracked via a device wide atomic
> > private object. That is actually not robust because we can't
> > afford to enforce strict global ordering between the pipes.
> > Thus I think I'll need to change this to simply chop up the
> > available bandwidth between all the active pipes. Each pipe
> > can then do whatever it wants as long as it doesn't exceed
> > its budget. That scheme will also require that we assume that
> > any number of planes could be active at any time.
> > 
> > TODO: make it robust and deal with all the open questions
> > 
> > v2: Sleep longer after disabling SAGV
> > v3: Poll for the dclk to get raised (seen it take 250ms!)
> >     If the system has 2133MT/s memory then we pointlessly
> >     wait one full second :(
> > v4: Use the new pcode interface to get the qgv points rather
> >     that using hardcoded numbers
> > 
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/Makefile             |   1 +
> >  drivers/gpu/drm/i915/i915_drv.c           | 229
> > ++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_drv.h           |  10 +
> >  drivers/gpu/drm/i915/i915_reg.h           |   3 +
> >  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
> >  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
> >  drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
> >  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
> >  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
> >  drivers/gpu/drm/i915/intel_drv.h          |   2 +
> >  10 files changed, 533 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
> >  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> > 
> > diff --git a/drivers/gpu/drm/i915/Makefile
> > b/drivers/gpu/drm/i915/Makefile
> > index 68106fe35a04..139a0fc19390 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
> >  	  intel_atomic.o \
> >  	  intel_atomic_plane.o \
> >  	  intel_bios.o \
> > +	  intel_bw.o \
> >  	  intel_cdclk.o \
> >  	  intel_color.o \
> >  	  intel_combo_phy.o \
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c
> > b/drivers/gpu/drm/i915/i915_drv.c
> > index 5ed864752c7b..b7fa7b51c2e2 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -70,6 +70,7 @@
> >  #include "intel_overlay.h"
> >  #include "intel_pipe_crc.h"
> >  #include "intel_pm.h"
> > +#include "intel_sideband.h"
> >  #include "intel_sprite.h"
> >  #include "intel_uc.h"
> >  
> > @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private
> > *dev_priv)
> >  	return 0;
> >  }
> >  
> > +struct intel_qgv_point {
> > +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> > +};
> > +
> > +struct intel_sagv_info {
> > +	struct intel_qgv_point points[3];
> > +	u8 num_points;
> > +	u8 num_channels;
> > +	u8 t_bl;
> > +	enum intel_dram_type dram_type;
> > +};
> > +
> > +static int icl_pcode_read_mem_global_info(struct drm_i915_private
> > *dev_priv,
> > +					  struct intel_sagv_info *si)
> > +{
> > +	u32 val = 0;
> > +	int ret;
> > +
> > +	ret = sandybridge_pcode_read(dev_priv,
> > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> > +				     &val, NULL);
> > +	if (ret)
> > +		return ret;
> > +
> > +	switch (val & 0xf) {
> > +	case 0:
> > +		si->dram_type = INTEL_DRAM_DDR4;
> > +		break;
> > +	case 1:
> > +		si->dram_type = INTEL_DRAM_DDR3;
> > +		break;
> > +	case 2:
> > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > +		break;
> > +	case 3:
> > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > +		break;
> > +	default:
> > +		MISSING_CASE(val & 0xf);
> > +		break;
> > +	}
> > +
> > +	si->num_channels = (val & 0xf0) >> 4;
> > +	si->num_points = (val & 0xf00) >> 8;
> > +
> > +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_pcode_read_qgv_point_info(struct drm_i915_private
> > *dev_priv,
> > +					 struct intel_qgv_point *sp,
> > +					 int point)
> Are we trying to retrieve the dram timing parameters to calculate the
> latency? If so can that be seperated as latency calculation instead of
> using it under bw info below?
> > +{
> > +	u32 val = 0, val2;
> > +	int ret;
> > +
> > +	ret = sandybridge_pcode_read(dev_priv,
> > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_IN
> > FO(point),
> > +				     &val, &val2);
> > +	if (ret)
> > +		return ret;
> > +
> > +	sp->dclk = val & 0xffff;
> > +	sp->t_rp = (val & 0xff0000) >> 16;
> > +	sp->t_rcd = (val & 0xff000000) >> 24;
> > +
> > +	sp->t_rdpre = val2 & 0xff;
> > +	sp->t_ras = (val2 & 0xff00) >> 8;
> > +
> > +	sp->t_rc = sp->t_rp + sp->t_ras;
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> > +			      struct intel_sagv_info *si)
> > +{
> > +	int i, ret;
> > +
> > +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> > +		si->num_points = ARRAY_SIZE(si->points);
> > +
> > +	for (i = 0; i < si->num_points; i++) {
> > +		struct intel_qgv_point *sp = &si->points[i];
> > +
> > +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> > +		if (ret)
> > +			return ret;
> > +
> > +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d
> > tRCD=%d tRC=%d\n",
> > +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp-
> > >t_ras,
> > +			      sp->t_rcd, sp->t_rc);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_calc_bw(int dclk, int num, int den)
> > +{
> > +	/* multiples of 16.666MHz (100/6) */
> > +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> > +}
> > +
> > +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> > +{
> > +	u16 dclk = 0;
> > +	int i;
> > +
> > +	for (i = 0; i < si->num_points; i++)
> > +		dclk = max(dclk, si->points[i].dclk);
> > +
> > +	return dclk;
> > +}
> > +
> > +struct intel_sa_info {
> > +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> > +};
> > +
> > +static const struct intel_sa_info icl_sa_info = {
> > +	.deburst = 8,
> > +	.mpagesize = 16,
> > +	.deprogbwlimit = 25, /* GB/s */
> > +	.displayrtids = 128,
> > +};
> > +
> > +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> > +{
> > +	struct intel_sagv_info si = {};
> > +	const struct intel_sa_info *sa = &icl_sa_info;
> > +	bool is_y_tile = true; /* assume y tile may be used */
> > +	int num_channels;
> > +	int deinterleave;
> > +	int ipqdepth, ipqdepthpch;
> > +	int dclk_max;
> > +	int maxdebw;
> > +	int i, ret;
> > +
> > +	ret = icl_get_qgv_points(dev_priv, &si);
> > +	if (ret)
> > +		return ret;
> > +	num_channels = si.num_channels;
> > +
> > +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> > +	dclk_max = icl_sagv_max_dclk(&si);
> > +
> > +	ipqdepthpch = 16;
> > +
> > +	maxdebw = min(sa->deprogbwlimit * 1000,
> > +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> > +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> > +
> > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> > +		int clpchgroup;
> > +		int j;
> > +
> > +		clpchgroup = (sa->deburst * deinterleave /
> > num_channels) << i;
> > +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup +
> > 1;
> > +
> > +		for (j = 0; j < si.num_points; j++) {
> > +			const struct intel_qgv_point *sp =
> > &si.points[j];
> > +			int ct, bw;
> > +
> > +			/*
> > +			 * Max row cycle time
> > +			 *
> > +			 * FIXME what is the logic behind the
> > +			 * assumed burst length?
> > +			 */
> > +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd
> > +
> > +				   (clpchgroup - 1) * si.t_bl + sp-
> > >t_rdpre);
> For logical flow can we move the above timing related calculations to a
> seperate function along with fixme to delink bandwidth and latency
> calculations?

I don't see what you would want to delink. This is all just
calculating the effective memory bandwidth limit.
Sripada, Radhakrishna May 14, 2019, 12:59 a.m. UTC | #8
On Mon, 2019-05-13 at 17:16 +0300, Ville Syrjälä wrote:
> On Wed, May 08, 2019 at 09:05:06PM +0000, Sripada, Radhakrishna
> wrote:
> > On Fri, 2019-05-03 at 22:08 +0300, Ville Syrjala wrote:
> > > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > 
> > > ICL has so many planes that it can easily exceed the maximum
> > > effective memory bandwidth of the system. We must therefore check
> > > that we don't exceed that limit.
> > > 
> > > The algorithm is very magic number heavy and lacks sufficient
> > > explanation for now. We also have no sane way to query the
> > > memory clock and timings, so we must rely on a combination of
> > > raw readout from the memory controller and hardcoded assumptions.
> > > The memory controller values obviously change as the system
> > > jumps between the different SAGV points, so we try to stabilize
> > > it first by disabling SAGV for the duration of the readout.
> > > 
> > > The utilized bandwidth is tracked via a device wide atomic
> > > private object. That is actually not robust because we can't
> > > afford to enforce strict global ordering between the pipes.
> > > Thus I think I'll need to change this to simply chop up the
> > > available bandwidth between all the active pipes. Each pipe
> > > can then do whatever it wants as long as it doesn't exceed
> > > its budget. That scheme will also require that we assume that
> > > any number of planes could be active at any time.
> > > 
> > > TODO: make it robust and deal with all the open questions
> > > 
> > > v2: Sleep longer after disabling SAGV
> > > v3: Poll for the dclk to get raised (seen it take 250ms!)
> > >     If the system has 2133MT/s memory then we pointlessly
> > >     wait one full second :(
> > > v4: Use the new pcode interface to get the qgv points rather
> > >     that using hardcoded numbers
> > > 
> > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/Makefile             |   1 +
> > >  drivers/gpu/drm/i915/i915_drv.c           | 229
> > > ++++++++++++++++++++++
> > >  drivers/gpu/drm/i915/i915_drv.h           |  10 +
> > >  drivers/gpu/drm/i915/i915_reg.h           |   3 +
> > >  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
> > >  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
> > >  drivers/gpu/drm/i915/intel_bw.c           | 181
> > > +++++++++++++++++
> > >  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
> > >  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
> > >  drivers/gpu/drm/i915/intel_drv.h          |   2 +
> > >  10 files changed, 533 insertions(+), 1 deletion(-)
> > >  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
> > >  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> > > 
> > > diff --git a/drivers/gpu/drm/i915/Makefile
> > > b/drivers/gpu/drm/i915/Makefile
> > > index 68106fe35a04..139a0fc19390 100644
> > > --- a/drivers/gpu/drm/i915/Makefile
> > > +++ b/drivers/gpu/drm/i915/Makefile
> > > @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
> > >  	  intel_atomic.o \
> > >  	  intel_atomic_plane.o \
> > >  	  intel_bios.o \
> > > +	  intel_bw.o \
> > >  	  intel_cdclk.o \
> > >  	  intel_color.o \
> > >  	  intel_combo_phy.o \
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.c
> > > b/drivers/gpu/drm/i915/i915_drv.c
> > > index 5ed864752c7b..b7fa7b51c2e2 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.c
> > > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > > @@ -70,6 +70,7 @@
> > >  #include "intel_overlay.h"
> > >  #include "intel_pipe_crc.h"
> > >  #include "intel_pm.h"
> > > +#include "intel_sideband.h"
> > >  #include "intel_sprite.h"
> > >  #include "intel_uc.h"
> > >  
> > > @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private
> > > *dev_priv)
> > >  	return 0;
> > >  }
> > >  
> > > +struct intel_qgv_point {
> > > +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> > > +};
> > > +
> > > +struct intel_sagv_info {
> > > +	struct intel_qgv_point points[3];
> > > +	u8 num_points;
> > > +	u8 num_channels;
> > > +	u8 t_bl;
> > > +	enum intel_dram_type dram_type;
> > > +};
> > > +
> > > +static int icl_pcode_read_mem_global_info(struct
> > > drm_i915_private
> > > *dev_priv,
> > > +					  struct intel_sagv_info *si)
> > > +{
> > > +	u32 val = 0;
> > > +	int ret;
> > > +
> > > +	ret = sandybridge_pcode_read(dev_priv,
> > > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > > +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> > > +				     &val, NULL);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	switch (val & 0xf) {
> > > +	case 0:
> > > +		si->dram_type = INTEL_DRAM_DDR4;
> > > +		break;
> > > +	case 1:
> > > +		si->dram_type = INTEL_DRAM_DDR3;
> > > +		break;
> > > +	case 2:
> > > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > > +		break;
> > > +	case 3:
> > > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > > +		break;
> > > +	default:
> > > +		MISSING_CASE(val & 0xf);
> > > +		break;
> > > +	}
> > > +
> > > +	si->num_channels = (val & 0xf0) >> 4;
> > > +	si->num_points = (val & 0xf00) >> 8;
> > > +
> > > +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int icl_pcode_read_qgv_point_info(struct drm_i915_private
> > > *dev_priv,
> > > +					 struct intel_qgv_point *sp,
> > > +					 int point)
> > Are we trying to retrieve the dram timing parameters to calculate
> > the
> > latency? If so can that be seperated as latency calculation instead
> > of
> > using it under bw info below?
> > > +{
> > > +	u32 val = 0, val2;
> > > +	int ret;
> > > +
> > > +	ret = sandybridge_pcode_read(dev_priv,
> > > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > > +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_IN
> > > FO(point),
> > > +				     &val, &val2);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	sp->dclk = val & 0xffff;
> > > +	sp->t_rp = (val & 0xff0000) >> 16;
> > > +	sp->t_rcd = (val & 0xff000000) >> 24;
> > > +
> > > +	sp->t_rdpre = val2 & 0xff;
> > > +	sp->t_ras = (val2 & 0xff00) >> 8;
> > > +
> > > +	sp->t_rc = sp->t_rp + sp->t_ras;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> > > +			      struct intel_sagv_info *si)
> > > +{
> > > +	int i, ret;
> > > +
> > > +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> > > +		si->num_points = ARRAY_SIZE(si->points);
> > > +
> > > +	for (i = 0; i < si->num_points; i++) {
> > > +		struct intel_qgv_point *sp = &si->points[i];
> > > +
> > > +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> > > +		if (ret)
> > > +			return ret;
> > > +
> > > +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d
> > > tRCD=%d tRC=%d\n",
> > > +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp-
> > > > t_ras,
> > > +			      sp->t_rcd, sp->t_rc);
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int icl_calc_bw(int dclk, int num, int den)
> > > +{
> > > +	/* multiples of 16.666MHz (100/6) */
> > > +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> > > +}
> > > +
> > > +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> > > +{
> > > +	u16 dclk = 0;
> > > +	int i;
> > > +
> > > +	for (i = 0; i < si->num_points; i++)
> > > +		dclk = max(dclk, si->points[i].dclk);
> > > +
> > > +	return dclk;
> > > +}
> > > +
> > > +struct intel_sa_info {
> > > +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> > > +};
> > > +
> > > +static const struct intel_sa_info icl_sa_info = {
> > > +	.deburst = 8,
> > > +	.mpagesize = 16,
> > > +	.deprogbwlimit = 25, /* GB/s */
> > > +	.displayrtids = 128,
> > > +};
> > > +
> > > +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> > > +{
> > > +	struct intel_sagv_info si = {};
> > > +	const struct intel_sa_info *sa = &icl_sa_info;
> > > +	bool is_y_tile = true; /* assume y tile may be used */
> > > +	int num_channels;
> > > +	int deinterleave;
> > > +	int ipqdepth, ipqdepthpch;
> > > +	int dclk_max;
> > > +	int maxdebw;
> > > +	int i, ret;
> > > +
> > > +	ret = icl_get_qgv_points(dev_priv, &si);
> > > +	if (ret)
> > > +		return ret;
> > > +	num_channels = si.num_channels;
> > > +
> > > +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> > > +	dclk_max = icl_sagv_max_dclk(&si);
> > > +
> > > +	ipqdepthpch = 16;
> > > +
> > > +	maxdebw = min(sa->deprogbwlimit * 1000,
> > > +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> > > +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> > > +
> > > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > > +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> > > +		int clpchgroup;
> > > +		int j;
> > > +
> > > +		clpchgroup = (sa->deburst * deinterleave /
> > > num_channels) << i;
> > > +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup +
> > > 1;
> > > +
> > > +		for (j = 0; j < si.num_points; j++) {
> > > +			const struct intel_qgv_point *sp =
> > > &si.points[j];
> > > +			int ct, bw;
> > > +
> > > +			/*
> > > +			 * Max row cycle time
> > > +			 *
> > > +			 * FIXME what is the logic behind the
> > > +			 * assumed burst length?
> > > +			 */
> > > +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd
> > > +
> > > +				   (clpchgroup - 1) * si.t_bl + sp-
> > > > t_rdpre);
> > For logical flow can we move the above timing related calculations
> > to a
> > seperate function along with fixme to delink bandwidth and latency
> > calculations?
> 
> I don't see what you would want to delink. This is all just
> calculating the effective memory bandwidth limit.
I am assuming the variables t_rc, t_rp, t_rcd are related to dram
latency/timing information. I was wondering if this latency calculation
is moved to a seperate inline function for readability purposes.

- Radhakrishna(RK) Sripada 
>
Matt Roper May 17, 2019, 6:03 p.m. UTC | #9
On Mon, May 13, 2019 at 05:13:10PM +0300, Ville Syrjälä wrote:
> On Fri, May 10, 2019 at 05:42:09PM -0700, Matt Roper wrote:
> > On Fri, May 03, 2019 at 10:08:31PM +0300, Ville Syrjala wrote:
> > > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > 
> > > ICL has so many planes that it can easily exceed the maximum
> > > effective memory bandwidth of the system. We must therefore check
> > > that we don't exceed that limit.
> > > 
> > > The algorithm is very magic number heavy and lacks sufficient
> > > explanation for now. We also have no sane way to query the
> > > memory clock and timings, so we must rely on a combination of
> > > raw readout from the memory controller and hardcoded assumptions.
> > > The memory controller values obviously change as the system
> > > jumps between the different SAGV points, so we try to stabilize
> > > it first by disabling SAGV for the duration of the readout.
> > > 
> > > The utilized bandwidth is tracked via a device wide atomic
> > > private object. That is actually not robust because we can't
> > > afford to enforce strict global ordering between the pipes.
> > > Thus I think I'll need to change this to simply chop up the
> > > available bandwidth between all the active pipes. Each pipe
> > > can then do whatever it wants as long as it doesn't exceed
> > > its budget. That scheme will also require that we assume that
> > > any number of planes could be active at any time.
> > > 
> > > TODO: make it robust and deal with all the open questions
> > > 
> > > v2: Sleep longer after disabling SAGV
> > > v3: Poll for the dclk to get raised (seen it take 250ms!)
> > >     If the system has 2133MT/s memory then we pointlessly
> > >     wait one full second :(
> > > v4: Use the new pcode interface to get the qgv points rather
> > >     that using hardcoded numbers
> > > 
> > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/Makefile             |   1 +
> > >  drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
> > >  drivers/gpu/drm/i915/i915_drv.h           |  10 +
> > >  drivers/gpu/drm/i915/i915_reg.h           |   3 +
> > >  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
> > >  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
> > >  drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
> > >  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
> > >  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
> > >  drivers/gpu/drm/i915/intel_drv.h          |   2 +
> > >  10 files changed, 533 insertions(+), 1 deletion(-)
> > >  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
> > >  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> > > 
> > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > > index 68106fe35a04..139a0fc19390 100644
> > > --- a/drivers/gpu/drm/i915/Makefile
> > > +++ b/drivers/gpu/drm/i915/Makefile
> > > @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
> > >  	  intel_atomic.o \
> > >  	  intel_atomic_plane.o \
> > >  	  intel_bios.o \
> > > +	  intel_bw.o \
> > >  	  intel_cdclk.o \
> > >  	  intel_color.o \
> > >  	  intel_combo_phy.o \
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > > index 5ed864752c7b..b7fa7b51c2e2 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.c
> > > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > > @@ -70,6 +70,7 @@
> > >  #include "intel_overlay.h"
> > >  #include "intel_pipe_crc.h"
> > >  #include "intel_pm.h"
> > > +#include "intel_sideband.h"
> > >  #include "intel_sprite.h"
> > >  #include "intel_uc.h"
> > >  
> > > @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
> > >  	return 0;
> > >  }
> > >  
> > > +struct intel_qgv_point {
> > > +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> > > +};
> > > +
> > > +struct intel_sagv_info {
> > > +	struct intel_qgv_point points[3];
> > > +	u8 num_points;
> > > +	u8 num_channels;
> > > +	u8 t_bl;
> > > +	enum intel_dram_type dram_type;
> > > +};
> > > +
> > > +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> > > +					  struct intel_sagv_info *si)
> > > +{
> > > +	u32 val = 0;
> > > +	int ret;
> > > +
> > > +	ret = sandybridge_pcode_read(dev_priv,
> > > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > > +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> > > +				     &val, NULL);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	switch (val & 0xf) {
> > > +	case 0:
> > > +		si->dram_type = INTEL_DRAM_DDR4;
> > > +		break;
> > > +	case 1:
> > > +		si->dram_type = INTEL_DRAM_DDR3;
> > > +		break;
> > > +	case 2:
> > > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > > +		break;
> > > +	case 3:
> > > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > > +		break;
> > > +	default:
> > > +		MISSING_CASE(val & 0xf);
> > > +		break;
> > > +	}
> > > +
> > > +	si->num_channels = (val & 0xf0) >> 4;
> > > +	si->num_points = (val & 0xf00) >> 8;
> > > +
> > > +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> > > +					 struct intel_qgv_point *sp,
> > > +					 int point)
> > > +{
> > > +	u32 val = 0, val2;
> > > +	int ret;
> > > +
> > > +	ret = sandybridge_pcode_read(dev_priv,
> > > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > > +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> > > +				     &val, &val2);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	sp->dclk = val & 0xffff;
> > > +	sp->t_rp = (val & 0xff0000) >> 16;
> > > +	sp->t_rcd = (val & 0xff000000) >> 24;
> > > +
> > > +	sp->t_rdpre = val2 & 0xff;
> > > +	sp->t_ras = (val2 & 0xff00) >> 8;
> > > +
> > > +	sp->t_rc = sp->t_rp + sp->t_ras;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> > > +			      struct intel_sagv_info *si)
> > > +{
> > > +	int i, ret;
> > > +
> > > +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> > > +		si->num_points = ARRAY_SIZE(si->points);
> > > +
> > > +	for (i = 0; i < si->num_points; i++) {
> > > +		struct intel_qgv_point *sp = &si->points[i];
> > > +
> > > +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> > > +		if (ret)
> > > +			return ret;
> > > +
> > > +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> > > +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> > > +			      sp->t_rcd, sp->t_rc);
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > 
> > It might make sense to separate the pcode readout stuff into a patch of
> > its own.  Aside from the si->t_bl assignment, the functions above are
> > straightforward details from the pcode HAS.  If we wind up needing to
> > drop the complicated algorithm below and replace it with a different
> > one, the pcode readout part won't need to change.
> 
> Perhaps. OTOH it does mean having a patch that adds a bunch
> of unused code.
> 
> > 
> > > +
> > > +static int icl_calc_bw(int dclk, int num, int den)
> > > +{
> > > +	/* multiples of 16.666MHz (100/6) */
> > > +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> > > +}
> > > +
> > > +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> > > +{
> > > +	u16 dclk = 0;
> > > +	int i;
> > > +
> > > +	for (i = 0; i < si->num_points; i++)
> > > +		dclk = max(dclk, si->points[i].dclk);
> > > +
> > > +	return dclk;
> > > +}
> > > +
> > > +struct intel_sa_info {
> > > +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> > > +};
> > > +
> > > +static const struct intel_sa_info icl_sa_info = {
> > > +	.deburst = 8,
> > > +	.mpagesize = 16,
> > > +	.deprogbwlimit = 25, /* GB/s */
> > > +	.displayrtids = 128,
> > > +};
> > > +
> > > +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> > > +{
> > > +	struct intel_sagv_info si = {};
> > > +	const struct intel_sa_info *sa = &icl_sa_info;
> > > +	bool is_y_tile = true; /* assume y tile may be used */
> > > +	int num_channels;
> > > +	int deinterleave;
> > > +	int ipqdepth, ipqdepthpch;
> > > +	int dclk_max;
> > > +	int maxdebw;
> > > +	int i, ret;
> > > +
> > > +	ret = icl_get_qgv_points(dev_priv, &si);
> > > +	if (ret)
> > > +		return ret;
> > > +	num_channels = si.num_channels;
> > > +
> > > +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> > > +	dclk_max = icl_sagv_max_dclk(&si);
> > > +
> > > +	ipqdepthpch = 16;
> > > +
> > > +	maxdebw = min(sa->deprogbwlimit * 1000,
> > > +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> > > +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> > > +
> > > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > > +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> > > +		int clpchgroup;
> > > +		int j;
> > > +
> > > +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> > > +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> > > +
> > > +		for (j = 0; j < si.num_points; j++) {
> > > +			const struct intel_qgv_point *sp = &si.points[j];
> > > +			int ct, bw;
> > > +
> > > +			/*
> > > +			 * Max row cycle time
> > > +			 *
> > > +			 * FIXME what is the logic behind the
> > > +			 * assumed burst length?
> > > +			 */
> > > +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> > > +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> > > +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> > 
> > The HAS document uses *64 instead of *32 for the BW calculation here.
> > Are we doubling the value somewhere else that I'm overlooking?
> 
> It was 32 originally, then was updated to 64, and the dropped back to
> 32 because 64 caused everything to get doubled from what it was supposed
> to be. The reason was that pcode is giving doubled values to what the 64
> version of the algorithm was expecting.
> 
> > 
> > 
> > > +
> > > +			bi->deratedbw[j] = min(maxdebw,
> > > +					       bw * 9 / 10); /* 90% */
> > > +
> > > +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> > > +				      i, j, bi->num_planes, bi->deratedbw[j]);
> > > +		}
> > > +
> > > +		if (bi->num_planes == 1)
> > > +			break;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> > > +			       int num_planes, int qgv_point)
> > > +{
> > > +	int i;
> > > +
> > > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > > +		const struct intel_bw_info *bi =
> > > +			&dev_priv->max_bw[i];
> > > +
> > > +		if (num_planes >= bi->num_planes)
> > > +			return bi->deratedbw[qgv_point];
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> > > +				 int num_planes)
> > > +{
> > > +	if (IS_ICELAKE(dev_priv))
> > > +		/*
> > > +		 * FIXME with SAGV disabled maybe we can assume
> > > +		 * point 1 will always be used? Seems to match
> > > +		 * the behaviour observed in the wild.
> > > +		 */
> > > +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> > > +			    icl_max_bw(dev_priv, num_planes, 1),
> > > +			    icl_max_bw(dev_priv, num_planes, 2));
> > > +	else
> > > +		return UINT_MAX;
> > > +}
> > 
> > Any specific reason reason some of these functions are in i915_drv.c?
> > Seems like they could just go in the new intel_bw.c.
> 
> They're here mostly because the DRAM readout was here. But yeah, should
> probably move it all to intel_bw.c.
> 
> > 
> > > +
> > >  static void
> > >  intel_get_dram_info(struct drm_i915_private *dev_priv)
> > >  {
> > > @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
> > >  	 */
> > >  	intel_get_dram_info(dev_priv);
> > >  
> > > +	if (INTEL_GEN(dev_priv) >= 11)
> > > +		icl_get_bw_info(dev_priv);
> > >  
> > >  	return 0;
> > >  
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index 64fa353a62bb..d1b9c3fe5802 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -54,6 +54,7 @@
> > >  #include <drm/drm_cache.h>
> > >  #include <drm/drm_util.h>
> > >  #include <drm/drm_dsc.h>
> > > +#include <drm/drm_atomic.h>
> > >  #include <drm/drm_connector.h>
> > >  #include <drm/i915_mei_hdcp_interface.h>
> > >  
> > > @@ -1837,6 +1838,13 @@ struct drm_i915_private {
> > >  		} type;
> > >  	} dram_info;
> > >  
> > > +	struct intel_bw_info {
> > > +		int num_planes;
> > > +		int deratedbw[3];
> > > +	} max_bw[6];
> > > +
> > > +	struct drm_private_obj bw_obj;
> > > +
> > >  	struct i915_runtime_pm runtime_pm;
> > >  
> > >  	struct {
> > > @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
> > >  extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
> > >  extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
> > >  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> > > +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> > > +				 int num_planes);
> > >  
> > >  u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
> > >  
> > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > > index e97c47fca645..399366a41524 100644
> > > --- a/drivers/gpu/drm/i915/i915_reg.h
> > > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > > @@ -8774,6 +8774,9 @@ enum {
> > >  #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
> > >  #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
> > >  #define   GEN6_READ_OC_PARAMS			0xc
> > > +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> > > +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> > > +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
> > >  #define   GEN6_PCODE_READ_D_COMP		0x10
> > >  #define   GEN6_PCODE_WRITE_D_COMP		0x11
> > >  #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> > > diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> > > index d11681d71add..f142c5c22d7e 100644
> > > --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> > > +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> > > @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
> > >  	drm_atomic_helper_plane_destroy_state(plane, state);
> > >  }
> > >  
> > > +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> > > +				   const struct intel_plane_state *plane_state)
> > > +{
> > > +	const struct drm_framebuffer *fb = plane_state->base.fb;
> > > +	unsigned int cpp = 0;
> > > +	int i;
> > > +
> > > +	if (!plane_state->base.visible)
> > > +		return 0;
> > > +
> > > +	for (i = 0; i < fb->format->num_planes; i++)
> > > +		cpp += fb->format->cpp[i];
> > 
> > Will this handle NV12 properly?  This will give us 1+2, but the
> > algorithm document indicates "NV12 should be considered as 4 bytes per
> > pixel."
> 
> Hmm. That is a bit of a strange way to handle NV12. Probably need to ask
> why that is the recommendation.

Any update on this?  I agree that the recommendation to consider it 4
bytes seems odd from a software point of view, but I'd like to get
confirmation from whoever came up with the algorithm as to whether it's
truly important or whether they just specified that because they thought
it would be simpler somehow.

If you've already got confirmation from someone on the hardware side
that this is okay, then I think this patch is good enough for now:

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>

and we can figure out something more sophisicated to do about the global
locking situation in followup patches.  Please also push on the hardware
guys to update the algorithm documentation with the outcome here and
also the 32 vs 64 thing that you clarified above.


Matt

> > 
> > > +
> > > +	return cpp * crtc_state->pixel_rate;
> > > +}
> > > +
> > >  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
> > >  					struct intel_crtc_state *new_crtc_state,
> > >  					const struct intel_plane_state *old_plane_state,
> > > @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
> > >  	new_crtc_state->active_planes &= ~BIT(plane->id);
> > >  	new_crtc_state->nv12_planes &= ~BIT(plane->id);
> > >  	new_crtc_state->c8_planes &= ~BIT(plane->id);
> > > +	new_crtc_state->data_rate[plane->id] = 0;
> > >  	new_plane_state->base.visible = false;
> > >  
> > >  	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> > > @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
> > >  	if (new_plane_state->base.visible || old_plane_state->base.visible)
> > >  		new_crtc_state->update_planes |= BIT(plane->id);
> > >  
> > > +	new_crtc_state->data_rate[plane->id] =
> > > +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> > > +
> > >  	return intel_plane_atomic_calc_changes(old_crtc_state,
> > >  					       &new_crtc_state->base,
> > >  					       old_plane_state,
> > > diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> > > index 14678620440f..0a9651376d0e 100644
> > > --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> > > +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> > > @@ -15,6 +15,8 @@ struct intel_plane_state;
> > >  
> > >  extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
> > >  
> > > +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> > > +				   const struct intel_plane_state *plane_state);
> > >  void intel_update_plane(struct intel_plane *plane,
> > >  			const struct intel_crtc_state *crtc_state,
> > >  			const struct intel_plane_state *plane_state);
> > > diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> > > new file mode 100644
> > > index 000000000000..304bf87f0a2e
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/i915/intel_bw.c
> > > @@ -0,0 +1,181 @@
> > > +// SPDX-License-Identifier: MIT
> > > +/*
> > > + * Copyright © 2019 Intel Corporation
> > > + */
> > > +
> > > +#include <drm/drm_atomic_state_helper.h>
> > > +
> > > +#include "intel_bw.h"
> > > +#include "intel_drv.h"
> > > +
> > > +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> > > +{
> > > +	/*
> > > +	 * We assume cursors are small enough
> > > +	 * to not not cause bandwidth problems.
> > > +	 */
> > > +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
> > 
> > Do we also need to account for NV12 slave planes?  IIRC, they're not set
> > in the active_planes bitmask, but they're still reading from DRAM, so I
> > imagine they'd count toward the calculations here?
> 
> They should be part of active_planes.
> 
> > 
> > > +}
> > > +
> > > +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> > > +{
> > > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > > +	unsigned int data_rate = 0;
> > > +	enum plane_id plane_id;
> > > +
> > > +	for_each_plane_id_on_crtc(crtc, plane_id) {
> > > +		/*
> > > +		 * We assume cursors are small enough
> > > +		 * to not not cause bandwidth problems.
> > > +		 */
> > > +		if (plane_id == PLANE_CURSOR)
> > > +			continue;
> > > +
> > > +		data_rate += crtc_state->data_rate[plane_id];
> > > +	}
> > > +
> > > +	return data_rate;
> > > +}
> > > +
> > > +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> > > +			  const struct intel_crtc_state *crtc_state)
> > > +{
> > > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > > +
> > > +	bw_state->data_rate[crtc->pipe] =
> > > +		intel_bw_crtc_data_rate(crtc_state);
> > > +	bw_state->num_active_planes[crtc->pipe] =
> > > +		intel_bw_crtc_num_active_planes(crtc_state);
> > > +
> > > +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> > > +		      pipe_name(crtc->pipe),
> > > +		      bw_state->data_rate[crtc->pipe],
> > > +		      bw_state->num_active_planes[crtc->pipe]);
> > > +}
> > > +
> > > +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> > > +					       const struct intel_bw_state *bw_state)
> > > +{
> > > +	unsigned int num_active_planes = 0;
> > > +	enum pipe pipe;
> > > +
> > > +	for_each_pipe(dev_priv, pipe)
> > > +		num_active_planes += bw_state->num_active_planes[pipe];
> > > +
> > > +	return num_active_planes;
> > > +}
> > > +
> > > +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> > > +				       const struct intel_bw_state *bw_state)
> > > +{
> > > +	unsigned int data_rate = 0;
> > > +	enum pipe pipe;
> > > +
> > > +	for_each_pipe(dev_priv, pipe)
> > > +		data_rate += bw_state->data_rate[pipe];
> > > +
> > > +	return data_rate;
> > > +}
> > > +
> > > +int intel_bw_atomic_check(struct intel_atomic_state *state)
> > > +{
> > > +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> > > +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> > > +	struct intel_bw_state *bw_state = NULL;
> > > +	unsigned int data_rate, max_data_rate;
> > > +	unsigned int num_active_planes;
> > > +	struct intel_crtc *crtc;
> > > +	int i;
> > > +
> > > +	/* FIXME earlier gens need some checks too */
> > > +	if (INTEL_GEN(dev_priv) < 11)
> > > +		return 0;
> > > +
> > > +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> > > +					    new_crtc_state, i) {
> > > +		unsigned int old_data_rate =
> > > +			intel_bw_crtc_data_rate(old_crtc_state);
> > > +		unsigned int new_data_rate =
> > > +			intel_bw_crtc_data_rate(new_crtc_state);
> > > +		unsigned int old_active_planes =
> > > +			intel_bw_crtc_num_active_planes(old_crtc_state);
> > > +		unsigned int new_active_planes =
> > > +			intel_bw_crtc_num_active_planes(new_crtc_state);
> > > +
> > > +		/*
> > > +		 * Avoid locking the bw state when
> > > +		 * nothing significant has changed.
> > > +		 */
> > > +		if (old_data_rate == new_data_rate &&
> > > +		    old_active_planes == new_active_planes)
> > > +			continue;
> > > +
> > > +		bw_state  = intel_atomic_get_bw_state(state);
> > > +		if (IS_ERR(bw_state))
> > > +			return PTR_ERR(bw_state);
> > > +
> > > +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> > > +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> > > +
> > > +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> > > +			      pipe_name(crtc->pipe),
> > > +			      bw_state->data_rate[crtc->pipe],
> > > +			      bw_state->num_active_planes[crtc->pipe]);
> > > +	}
> > > +
> > > +	if (!bw_state)
> > > +		return 0;
> > > +
> > > +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> > > +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> > > +
> > > +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> > > +
> > > +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> > > +
> > > +	if (data_rate > max_data_rate) {
> > > +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> > > +			      data_rate, max_data_rate, num_active_planes);
> > > +		return -EINVAL;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> > > +{
> > > +	struct intel_bw_state *state;
> > > +
> > > +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> > > +	if (!state)
> > > +		return NULL;
> > > +
> > > +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> > > +
> > > +	return &state->base;
> > > +}
> > > +
> > > +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> > > +				   struct drm_private_state *state)
> > > +{
> > > +	kfree(state);
> > > +}
> > > +
> > > +static const struct drm_private_state_funcs intel_bw_funcs = {
> > > +	.atomic_duplicate_state = intel_bw_duplicate_state,
> > > +	.atomic_destroy_state = intel_bw_destroy_state,
> > > +};
> > > +
> > > +int intel_bw_init(struct drm_i915_private *dev_priv)
> > > +{
> > > +	struct intel_bw_state *state;
> > > +
> > > +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> > > +	if (!state)
> > > +		return -ENOMEM;
> > > +
> > > +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> > > +				    &state->base, &intel_bw_funcs);
> > > +
> > > +	return 0;
> > > +}
> > > diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> > > new file mode 100644
> > > index 000000000000..c14272ca5b59
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/i915/intel_bw.h
> > > @@ -0,0 +1,46 @@
> > > +/* SPDX-License-Identifier: MIT */
> > > +/*
> > > + * Copyright © 2019 Intel Corporation
> > > + */
> > > +
> > > +#ifndef __INTEL_BW_H__
> > > +#define __INTEL_BW_H__
> > > +
> > > +#include <drm/drm_atomic.h>
> > > +
> > > +#include "i915_drv.h"
> > > +#include "intel_display.h"
> > > +
> > > +struct drm_i915_private;
> > > +struct intel_atomic_state;
> > > +struct intel_crtc_state;
> > > +
> > > +struct intel_bw_state {
> > > +	struct drm_private_state base;
> > > +
> > > +	unsigned int data_rate[I915_MAX_PIPES];
> > > +	u8 num_active_planes[I915_MAX_PIPES];
> > > +};
> > > +
> > > +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> > > +
> > > +static inline struct intel_bw_state *
> > > +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> > > +{
> > > +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> > > +	struct drm_private_state *bw_state;
> > > +
> > 
> > Do we need to grab a lock here?  Otherwise I don't see anything
> > preventing parallel commits that update disjoint sets of CRTCs 
> > from exceeding the combined memory bandwidth.
> 
> Private objs have a lock embedded in them now. But even with that
> it is not quite race free since we don't serialize the actual commits.
> So there is still a chance of temporary excursion above the limit.
> 
> > 
> > Granted, grabbing a central lock seems really painful since then plane
> > updates on one CRTC may wind up blocking plane updates on another CRTC.
> > But I don't know if there's an easy way around that with the algorithm
> > we've been given either.  Was the mention of potentially chopping up the
> > bandwidth in the commit message how you plan to address this?  I.e.,
> > treat it like we do with DDB space on modesets --- since we globally
> > lock everything on any modeset we can divide it up at that point and
> > then intra-CRTC updates will only have to worry about their own
> > allocation thereafter?  That could potentially limit plane usage more
> > than is actually necessary in some cases; not sure how to weigh the
> > impact of that vs the downside of stuff like adding/removing/resizing
> > planes blocking block updates to unrelated CRTC's.
> 
> Yeah, that was the idea. This should avoid the aforementioned
> problem of temporarily exceeding the bw limit due to arbitrary
> commit order. I even started implementing this but ran into
> too much code that needs rework, so I put it on hold for now.
> 
> And as you note it would likely leave some bandwidth unutilized.
> I was also pondering about some kind of hybrid approach where each
> pipe would still have its own allocation, but we could somehow
> reallocate that dynamically without resorting to a full modeset.
> So each pipe would be independent until one reaches its current
> bw limit, at which point we try to steal unused bw from the other
> pipes for the pipe that ran out. Ie. the global serialization
> would only happen when someone is actually pushing the limits.
> 
> -- 
> Ville Syrjälä
> Intel
Clint Taylor May 17, 2019, 8:26 p.m. UTC | #10
Nit: Commit message V4 and Patch Subject V3

Acked-by: Clint Taylor <Clinton.A.Taylor@intel.com>

-Clint


On 5/3/19 12:08 PM, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> ICL has so many planes that it can easily exceed the maximum
> effective memory bandwidth of the system. We must therefore check
> that we don't exceed that limit.
>
> The algorithm is very magic number heavy and lacks sufficient
> explanation for now. We also have no sane way to query the
> memory clock and timings, so we must rely on a combination of
> raw readout from the memory controller and hardcoded assumptions.
> The memory controller values obviously change as the system
> jumps between the different SAGV points, so we try to stabilize
> it first by disabling SAGV for the duration of the readout.
>
> The utilized bandwidth is tracked via a device wide atomic
> private object. That is actually not robust because we can't
> afford to enforce strict global ordering between the pipes.
> Thus I think I'll need to change this to simply chop up the
> available bandwidth between all the active pipes. Each pipe
> can then do whatever it wants as long as it doesn't exceed
> its budget. That scheme will also require that we assume that
> any number of planes could be active at any time.
>
> TODO: make it robust and deal with all the open questions
>
> v2: Sleep longer after disabling SAGV
> v3: Poll for the dclk to get raised (seen it take 250ms!)
>      If the system has 2133MT/s memory then we pointlessly
>      wait one full second :(
> v4: Use the new pcode interface to get the qgv points rather
>      that using hardcoded numbers
>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile             |   1 +
>   drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_drv.h           |  10 +
>   drivers/gpu/drm/i915/i915_reg.h           |   3 +
>   drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
>   drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
>   drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
>   drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
>   drivers/gpu/drm/i915/intel_display.c      |  40 +++-
>   drivers/gpu/drm/i915/intel_drv.h          |   2 +
>   10 files changed, 533 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/gpu/drm/i915/intel_bw.c
>   create mode 100644 drivers/gpu/drm/i915/intel_bw.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 68106fe35a04..139a0fc19390 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
>   	  intel_atomic.o \
>   	  intel_atomic_plane.o \
>   	  intel_bios.o \
> +	  intel_bw.o \
>   	  intel_cdclk.o \
>   	  intel_color.o \
>   	  intel_combo_phy.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 5ed864752c7b..b7fa7b51c2e2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -70,6 +70,7 @@
>   #include "intel_overlay.h"
>   #include "intel_pipe_crc.h"
>   #include "intel_pm.h"
> +#include "intel_sideband.h"
>   #include "intel_sprite.h"
>   #include "intel_uc.h"
>   
> @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
>   	return 0;
>   }
>   
> +struct intel_qgv_point {
> +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> +};
> +
> +struct intel_sagv_info {
> +	struct intel_qgv_point points[3];
> +	u8 num_points;
> +	u8 num_channels;
> +	u8 t_bl;
> +	enum intel_dram_type dram_type;
> +};
> +
> +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> +					  struct intel_sagv_info *si)
> +{
> +	u32 val = 0;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> +				     &val, NULL);
> +	if (ret)
> +		return ret;
> +
> +	switch (val & 0xf) {
> +	case 0:
> +		si->dram_type = INTEL_DRAM_DDR4;
> +		break;
> +	case 1:
> +		si->dram_type = INTEL_DRAM_DDR3;
> +		break;
> +	case 2:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	case 3:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	default:
> +		MISSING_CASE(val & 0xf);
> +		break;
> +	}
> +
> +	si->num_channels = (val & 0xf0) >> 4;
> +	si->num_points = (val & 0xf00) >> 8;
> +
> +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> +
> +	return 0;
> +}
> +
> +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> +					 struct intel_qgv_point *sp,
> +					 int point)
> +{
> +	u32 val = 0, val2;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> +				     &val, &val2);
> +	if (ret)
> +		return ret;
> +
> +	sp->dclk = val & 0xffff;
> +	sp->t_rp = (val & 0xff0000) >> 16;
> +	sp->t_rcd = (val & 0xff000000) >> 24;
> +
> +	sp->t_rdpre = val2 & 0xff;
> +	sp->t_ras = (val2 & 0xff00) >> 8;
> +
> +	sp->t_rc = sp->t_rp + sp->t_ras;
> +
> +	return 0;
> +}
> +
> +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> +			      struct intel_sagv_info *si)
> +{
> +	int i, ret;
> +
> +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> +	if (ret)
> +		return ret;
> +
> +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> +		si->num_points = ARRAY_SIZE(si->points);
> +
> +	for (i = 0; i < si->num_points; i++) {
> +		struct intel_qgv_point *sp = &si->points[i];
> +
> +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> +		if (ret)
> +			return ret;
> +
> +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> +			      sp->t_rcd, sp->t_rc);
> +	}
> +
> +	return 0;
> +}
> +
> +static int icl_calc_bw(int dclk, int num, int den)
> +{
> +	/* multiples of 16.666MHz (100/6) */
> +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> +}
> +
> +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> +{
> +	u16 dclk = 0;
> +	int i;
> +
> +	for (i = 0; i < si->num_points; i++)
> +		dclk = max(dclk, si->points[i].dclk);
> +
> +	return dclk;
> +}
> +
> +struct intel_sa_info {
> +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> +};
> +
> +static const struct intel_sa_info icl_sa_info = {
> +	.deburst = 8,
> +	.mpagesize = 16,
> +	.deprogbwlimit = 25, /* GB/s */
> +	.displayrtids = 128,
> +};
> +
> +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_sagv_info si = {};
> +	const struct intel_sa_info *sa = &icl_sa_info;
> +	bool is_y_tile = true; /* assume y tile may be used */
> +	int num_channels;
> +	int deinterleave;
> +	int ipqdepth, ipqdepthpch;
> +	int dclk_max;
> +	int maxdebw;
> +	int i, ret;
> +
> +	ret = icl_get_qgv_points(dev_priv, &si);
> +	if (ret)
> +		return ret;
> +	num_channels = si.num_channels;
> +
> +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> +	dclk_max = icl_sagv_max_dclk(&si);
> +
> +	ipqdepthpch = 16;
> +
> +	maxdebw = min(sa->deprogbwlimit * 1000,
> +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		int clpchgroup;
> +		int j;
> +
> +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> +
> +		for (j = 0; j < si.num_points; j++) {
> +			const struct intel_qgv_point *sp = &si.points[j];
> +			int ct, bw;
> +
> +			/*
> +			 * Max row cycle time
> +			 *
> +			 * FIXME what is the logic behind the
> +			 * assumed burst length?
> +			 */
> +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> +
> +			bi->deratedbw[j] = min(maxdebw,
> +					       bw * 9 / 10); /* 90% */
> +
> +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> +				      i, j, bi->num_planes, bi->deratedbw[j]);
> +		}
> +
> +		if (bi->num_planes == 1)
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
> +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> +			       int num_planes, int qgv_point)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		const struct intel_bw_info *bi =
> +			&dev_priv->max_bw[i];
> +
> +		if (num_planes >= bi->num_planes)
> +			return bi->deratedbw[qgv_point];
> +	}
> +
> +	return 0;
> +}
> +
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes)
> +{
> +	if (IS_ICELAKE(dev_priv))
> +		/*
> +		 * FIXME with SAGV disabled maybe we can assume
> +		 * point 1 will always be used? Seems to match
> +		 * the behaviour observed in the wild.
> +		 */
> +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> +			    icl_max_bw(dev_priv, num_planes, 1),
> +			    icl_max_bw(dev_priv, num_planes, 2));
> +	else
> +		return UINT_MAX;
> +}
> +
>   static void
>   intel_get_dram_info(struct drm_i915_private *dev_priv)
>   {
> @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
>   	 */
>   	intel_get_dram_info(dev_priv);
>   
> +	if (INTEL_GEN(dev_priv) >= 11)
> +		icl_get_bw_info(dev_priv);
>   
>   	return 0;
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 64fa353a62bb..d1b9c3fe5802 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -54,6 +54,7 @@
>   #include <drm/drm_cache.h>
>   #include <drm/drm_util.h>
>   #include <drm/drm_dsc.h>
> +#include <drm/drm_atomic.h>
>   #include <drm/drm_connector.h>
>   #include <drm/i915_mei_hdcp_interface.h>
>   
> @@ -1837,6 +1838,13 @@ struct drm_i915_private {
>   		} type;
>   	} dram_info;
>   
> +	struct intel_bw_info {
> +		int num_planes;
> +		int deratedbw[3];
> +	} max_bw[6];
> +
> +	struct drm_private_obj bw_obj;
> +
>   	struct i915_runtime_pm runtime_pm;
>   
>   	struct {
> @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
>   extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
>   extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
>   int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes);
>   
>   u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
>   
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e97c47fca645..399366a41524 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8774,6 +8774,9 @@ enum {
>   #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
>   #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
>   #define   GEN6_READ_OC_PARAMS			0xc
> +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
>   #define   GEN6_PCODE_READ_D_COMP		0x10
>   #define   GEN6_PCODE_WRITE_D_COMP		0x11
>   #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> index d11681d71add..f142c5c22d7e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
>   	drm_atomic_helper_plane_destroy_state(plane, state);
>   }
>   
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state)
> +{
> +	const struct drm_framebuffer *fb = plane_state->base.fb;
> +	unsigned int cpp = 0;
> +	int i;
> +
> +	if (!plane_state->base.visible)
> +		return 0;
> +
> +	for (i = 0; i < fb->format->num_planes; i++)
> +		cpp += fb->format->cpp[i];
> +
> +	return cpp * crtc_state->pixel_rate;
> +}
> +
>   int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
>   					struct intel_crtc_state *new_crtc_state,
>   					const struct intel_plane_state *old_plane_state,
> @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>   	new_crtc_state->active_planes &= ~BIT(plane->id);
>   	new_crtc_state->nv12_planes &= ~BIT(plane->id);
>   	new_crtc_state->c8_planes &= ~BIT(plane->id);
> +	new_crtc_state->data_rate[plane->id] = 0;
>   	new_plane_state->base.visible = false;
>   
>   	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>   	if (new_plane_state->base.visible || old_plane_state->base.visible)
>   		new_crtc_state->update_planes |= BIT(plane->id);
>   
> +	new_crtc_state->data_rate[plane->id] =
> +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> +
>   	return intel_plane_atomic_calc_changes(old_crtc_state,
>   					       &new_crtc_state->base,
>   					       old_plane_state,
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> index 14678620440f..0a9651376d0e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> @@ -15,6 +15,8 @@ struct intel_plane_state;
>   
>   extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
>   
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state);
>   void intel_update_plane(struct intel_plane *plane,
>   			const struct intel_crtc_state *crtc_state,
>   			const struct intel_plane_state *plane_state);
> diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> new file mode 100644
> index 000000000000..304bf87f0a2e
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include <drm/drm_atomic_state_helper.h>
> +
> +#include "intel_bw.h"
> +#include "intel_drv.h"
> +
> +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> +{
> +	/*
> +	 * We assume cursors are small enough
> +	 * to not not cause bandwidth problems.
> +	 */
> +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
> +}
> +
> +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	unsigned int data_rate = 0;
> +	enum plane_id plane_id;
> +
> +	for_each_plane_id_on_crtc(crtc, plane_id) {
> +		/*
> +		 * We assume cursors are small enough
> +		 * to not not cause bandwidth problems.
> +		 */
> +		if (plane_id == PLANE_CURSOR)
> +			continue;
> +
> +		data_rate += crtc_state->data_rate[plane_id];
> +	}
> +
> +	return data_rate;
> +}
> +
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +
> +	bw_state->data_rate[crtc->pipe] =
> +		intel_bw_crtc_data_rate(crtc_state);
> +	bw_state->num_active_planes[crtc->pipe] =
> +		intel_bw_crtc_num_active_planes(crtc_state);
> +
> +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +		      pipe_name(crtc->pipe),
> +		      bw_state->data_rate[crtc->pipe],
> +		      bw_state->num_active_planes[crtc->pipe]);
> +}
> +
> +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> +					       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int num_active_planes = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		num_active_planes += bw_state->num_active_planes[pipe];
> +
> +	return num_active_planes;
> +}
> +
> +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> +				       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int data_rate = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		data_rate += bw_state->data_rate[pipe];
> +
> +	return data_rate;
> +}
> +
> +int intel_bw_atomic_check(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> +	struct intel_bw_state *bw_state = NULL;
> +	unsigned int data_rate, max_data_rate;
> +	unsigned int num_active_planes;
> +	struct intel_crtc *crtc;
> +	int i;
> +
> +	/* FIXME earlier gens need some checks too */
> +	if (INTEL_GEN(dev_priv) < 11)
> +		return 0;
> +
> +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> +					    new_crtc_state, i) {
> +		unsigned int old_data_rate =
> +			intel_bw_crtc_data_rate(old_crtc_state);
> +		unsigned int new_data_rate =
> +			intel_bw_crtc_data_rate(new_crtc_state);
> +		unsigned int old_active_planes =
> +			intel_bw_crtc_num_active_planes(old_crtc_state);
> +		unsigned int new_active_planes =
> +			intel_bw_crtc_num_active_planes(new_crtc_state);
> +
> +		/*
> +		 * Avoid locking the bw state when
> +		 * nothing significant has changed.
> +		 */
> +		if (old_data_rate == new_data_rate &&
> +		    old_active_planes == new_active_planes)
> +			continue;
> +
> +		bw_state  = intel_atomic_get_bw_state(state);
> +		if (IS_ERR(bw_state))
> +			return PTR_ERR(bw_state);
> +
> +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> +
> +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +			      pipe_name(crtc->pipe),
> +			      bw_state->data_rate[crtc->pipe],
> +			      bw_state->num_active_planes[crtc->pipe]);
> +	}
> +
> +	if (!bw_state)
> +		return 0;
> +
> +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> +
> +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> +
> +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> +
> +	if (data_rate > max_data_rate) {
> +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> +			      data_rate, max_data_rate, num_active_planes);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return NULL;
> +
> +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> +
> +	return &state->base;
> +}
> +
> +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> +				   struct drm_private_state *state)
> +{
> +	kfree(state);
> +}
> +
> +static const struct drm_private_state_funcs intel_bw_funcs = {
> +	.atomic_duplicate_state = intel_bw_duplicate_state,
> +	.atomic_destroy_state = intel_bw_destroy_state,
> +};
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return -ENOMEM;
> +
> +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> +				    &state->base, &intel_bw_funcs);
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> new file mode 100644
> index 000000000000..c14272ca5b59
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef __INTEL_BW_H__
> +#define __INTEL_BW_H__
> +
> +#include <drm/drm_atomic.h>
> +
> +#include "i915_drv.h"
> +#include "intel_display.h"
> +
> +struct drm_i915_private;
> +struct intel_atomic_state;
> +struct intel_crtc_state;
> +
> +struct intel_bw_state {
> +	struct drm_private_state base;
> +
> +	unsigned int data_rate[I915_MAX_PIPES];
> +	u8 num_active_planes[I915_MAX_PIPES];
> +};
> +
> +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> +
> +static inline struct intel_bw_state *
> +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct drm_private_state *bw_state;
> +
> +	bw_state = drm_atomic_get_private_obj_state(&state->base,
> +						    &dev_priv->bw_obj);
> +	if (IS_ERR(bw_state))
> +		return ERR_CAST(bw_state);
> +
> +	return to_intel_bw_state(bw_state);
> +}
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv);
> +int intel_bw_atomic_check(struct intel_atomic_state *state);
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state);
> +
> +#endif /* __INTEL_BW_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index d81ec80e34f6..a955840b73cb 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -50,6 +50,7 @@
>   #include "intel_acpi.h"
>   #include "intel_atomic.h"
>   #include "intel_atomic_plane.h"
> +#include "intel_bw.h"
>   #include "intel_color.h"
>   #include "intel_cdclk.h"
>   #include "intel_crt.h"
> @@ -2863,6 +2864,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
>   
>   	intel_set_plane_visible(crtc_state, plane_state, false);
>   	fixup_active_planes(crtc_state);
> +	crtc_state->data_rate[plane->id] = 0;
>   
>   	if (plane->id == PLANE_PRIMARY)
>   		intel_pre_disable_primary_noatomic(&crtc->base);
> @@ -6590,6 +6592,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>   	struct intel_encoder *encoder;
>   	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>   	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> +	struct intel_bw_state *bw_state =
> +		to_intel_bw_state(dev_priv->bw_obj.state);
>   	enum intel_display_power_domain domain;
>   	struct intel_plane *plane;
>   	u64 domains;
> @@ -6652,6 +6656,9 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>   	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
>   	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
>   	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
> +
> +	bw_state->data_rate[intel_crtc->pipe] = 0;
> +	bw_state->num_active_planes[intel_crtc->pipe] = 0;
>   }
>   
>   /*
> @@ -11176,6 +11183,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
>   	if (!is_crtc_enabled) {
>   		plane_state->visible = visible = false;
>   		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
> +		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
>   	}
>   
>   	if (!was_visible && !visible)
> @@ -13296,7 +13304,15 @@ static int intel_atomic_check(struct drm_device *dev,
>   		return ret;
>   
>   	intel_fbc_choose_crtc(dev_priv, intel_state);
> -	return calc_watermark_data(intel_state);
> +	ret = calc_watermark_data(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_bw_atomic_check(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
>   }
>   
>   static int intel_atomic_prepare_commit(struct drm_device *dev,
> @@ -15696,6 +15712,10 @@ int intel_modeset_init(struct drm_device *dev)
>   
>   	drm_mode_config_init(dev);
>   
> +	ret = intel_bw_init(dev_priv);
> +	if (ret)
> +		return ret;
> +
>   	dev->mode_config.min_width = 0;
>   	dev->mode_config.min_height = 0;
>   
> @@ -16318,8 +16338,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>   	drm_connector_list_iter_end(&conn_iter);
>   
>   	for_each_intel_crtc(dev, crtc) {
> +		struct intel_bw_state *bw_state =
> +			to_intel_bw_state(dev_priv->bw_obj.state);
>   		struct intel_crtc_state *crtc_state =
>   			to_intel_crtc_state(crtc->base.state);
> +		struct intel_plane *plane;
>   		int min_cdclk = 0;
>   
>   		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
> @@ -16358,6 +16381,21 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>   		dev_priv->min_voltage_level[crtc->pipe] =
>   			crtc_state->min_voltage_level;
>   
> +		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
> +			const struct intel_plane_state *plane_state =
> +				to_intel_plane_state(plane->base.state);
> +
> +			/*
> +			 * FIXME don't have the fb yet, so can't
> +			 * use intel_plane_data_rate() :(
> +			 */
> +			if (plane_state->base.visible)
> +				crtc_state->data_rate[plane->id] =
> +					4 * crtc_state->pixel_rate;
> +		}
> +
> +		intel_bw_crtc_update(bw_state, crtc_state);
> +
>   		intel_pipe_config_sanity_check(dev_priv, crtc_state);
>   	}
>   }
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 4049e03d2c0d..47f551601a05 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -885,6 +885,8 @@ struct intel_crtc_state {
>   
>   	struct intel_crtc_wm_state wm;
>   
> +	u32 data_rate[I915_MAX_PLANES];
> +
>   	/* Gamma mode programmed on the pipe */
>   	u32 gamma_mode;
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 68106fe35a04..139a0fc19390 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -138,6 +138,7 @@  i915-y += intel_audio.o \
 	  intel_atomic.o \
 	  intel_atomic_plane.o \
 	  intel_bios.o \
+	  intel_bw.o \
 	  intel_cdclk.o \
 	  intel_color.o \
 	  intel_combo_phy.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5ed864752c7b..b7fa7b51c2e2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -70,6 +70,7 @@ 
 #include "intel_overlay.h"
 #include "intel_pipe_crc.h"
 #include "intel_pm.h"
+#include "intel_sideband.h"
 #include "intel_sprite.h"
 #include "intel_uc.h"
 
@@ -1435,6 +1436,232 @@  bxt_get_dram_info(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
+struct intel_qgv_point {
+	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
+};
+
+struct intel_sagv_info {
+	struct intel_qgv_point points[3];
+	u8 num_points;
+	u8 num_channels;
+	u8 t_bl;
+	enum intel_dram_type dram_type;
+};
+
+static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
+					  struct intel_sagv_info *si)
+{
+	u32 val = 0;
+	int ret;
+
+	ret = sandybridge_pcode_read(dev_priv,
+				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
+				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
+				     &val, NULL);
+	if (ret)
+		return ret;
+
+	switch (val & 0xf) {
+	case 0:
+		si->dram_type = INTEL_DRAM_DDR4;
+		break;
+	case 1:
+		si->dram_type = INTEL_DRAM_DDR3;
+		break;
+	case 2:
+		si->dram_type = INTEL_DRAM_LPDDR3;
+		break;
+	case 3:
+		si->dram_type = INTEL_DRAM_LPDDR3;
+		break;
+	default:
+		MISSING_CASE(val & 0xf);
+		break;
+	}
+
+	si->num_channels = (val & 0xf0) >> 4;
+	si->num_points = (val & 0xf00) >> 8;
+
+	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
+
+	return 0;
+}
+
+static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
+					 struct intel_qgv_point *sp,
+					 int point)
+{
+	u32 val = 0, val2;
+	int ret;
+
+	ret = sandybridge_pcode_read(dev_priv,
+				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
+				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
+				     &val, &val2);
+	if (ret)
+		return ret;
+
+	sp->dclk = val & 0xffff;
+	sp->t_rp = (val & 0xff0000) >> 16;
+	sp->t_rcd = (val & 0xff000000) >> 24;
+
+	sp->t_rdpre = val2 & 0xff;
+	sp->t_ras = (val2 & 0xff00) >> 8;
+
+	sp->t_rc = sp->t_rp + sp->t_ras;
+
+	return 0;
+}
+
+static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
+			      struct intel_sagv_info *si)
+{
+	int i, ret;
+
+	ret = icl_pcode_read_mem_global_info(dev_priv, si);
+	if (ret)
+		return ret;
+
+	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
+		si->num_points = ARRAY_SIZE(si->points);
+
+	for (i = 0; i < si->num_points; i++) {
+		struct intel_qgv_point *sp = &si->points[i];
+
+		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
+		if (ret)
+			return ret;
+
+		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
+			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
+			      sp->t_rcd, sp->t_rc);
+	}
+
+	return 0;
+}
+
+static int icl_calc_bw(int dclk, int num, int den)
+{
+	/* multiples of 16.666MHz (100/6) */
+	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
+}
+
+static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
+{
+	u16 dclk = 0;
+	int i;
+
+	for (i = 0; i < si->num_points; i++)
+		dclk = max(dclk, si->points[i].dclk);
+
+	return dclk;
+}
+
+struct intel_sa_info {
+	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
+};
+
+static const struct intel_sa_info icl_sa_info = {
+	.deburst = 8,
+	.mpagesize = 16,
+	.deprogbwlimit = 25, /* GB/s */
+	.displayrtids = 128,
+};
+
+static int icl_get_bw_info(struct drm_i915_private *dev_priv)
+{
+	struct intel_sagv_info si = {};
+	const struct intel_sa_info *sa = &icl_sa_info;
+	bool is_y_tile = true; /* assume y tile may be used */
+	int num_channels;
+	int deinterleave;
+	int ipqdepth, ipqdepthpch;
+	int dclk_max;
+	int maxdebw;
+	int i, ret;
+
+	ret = icl_get_qgv_points(dev_priv, &si);
+	if (ret)
+		return ret;
+	num_channels = si.num_channels;
+
+	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
+	dclk_max = icl_sagv_max_dclk(&si);
+
+	ipqdepthpch = 16;
+
+	maxdebw = min(sa->deprogbwlimit * 1000,
+		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
+	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
+
+	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
+		struct intel_bw_info *bi = &dev_priv->max_bw[i];
+		int clpchgroup;
+		int j;
+
+		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
+		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
+
+		for (j = 0; j < si.num_points; j++) {
+			const struct intel_qgv_point *sp = &si.points[j];
+			int ct, bw;
+
+			/*
+			 * Max row cycle time
+			 *
+			 * FIXME what is the logic behind the
+			 * assumed burst length?
+			 */
+			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
+				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
+			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
+
+			bi->deratedbw[j] = min(maxdebw,
+					       bw * 9 / 10); /* 90% */
+
+			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
+				      i, j, bi->num_planes, bi->deratedbw[j]);
+		}
+
+		if (bi->num_planes == 1)
+			break;
+	}
+
+	return 0;
+}
+
+static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
+			       int num_planes, int qgv_point)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
+		const struct intel_bw_info *bi =
+			&dev_priv->max_bw[i];
+
+		if (num_planes >= bi->num_planes)
+			return bi->deratedbw[qgv_point];
+	}
+
+	return 0;
+}
+
+unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
+				 int num_planes)
+{
+	if (IS_ICELAKE(dev_priv))
+		/*
+		 * FIXME with SAGV disabled maybe we can assume
+		 * point 1 will always be used? Seems to match
+		 * the behaviour observed in the wild.
+		 */
+		return min3(icl_max_bw(dev_priv, num_planes, 0),
+			    icl_max_bw(dev_priv, num_planes, 1),
+			    icl_max_bw(dev_priv, num_planes, 2));
+	else
+		return UINT_MAX;
+}
+
 static void
 intel_get_dram_info(struct drm_i915_private *dev_priv)
 {
@@ -1655,6 +1882,8 @@  static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 	 */
 	intel_get_dram_info(dev_priv);
 
+	if (INTEL_GEN(dev_priv) >= 11)
+		icl_get_bw_info(dev_priv);
 
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 64fa353a62bb..d1b9c3fe5802 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -54,6 +54,7 @@ 
 #include <drm/drm_cache.h>
 #include <drm/drm_util.h>
 #include <drm/drm_dsc.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_connector.h>
 #include <drm/i915_mei_hdcp_interface.h>
 
@@ -1837,6 +1838,13 @@  struct drm_i915_private {
 		} type;
 	} dram_info;
 
+	struct intel_bw_info {
+		int num_planes;
+		int deratedbw[3];
+	} max_bw[6];
+
+	struct drm_private_obj bw_obj;
+
 	struct i915_runtime_pm runtime_pm;
 
 	struct {
@@ -2706,6 +2714,8 @@  extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
 extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
+unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
+				 int num_planes);
 
 u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e97c47fca645..399366a41524 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -8774,6 +8774,9 @@  enum {
 #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
 #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
 #define   GEN6_READ_OC_PARAMS			0xc
+#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
+#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
+#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
 #define   GEN6_PCODE_READ_D_COMP		0x10
 #define   GEN6_PCODE_WRITE_D_COMP		0x11
 #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index d11681d71add..f142c5c22d7e 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -114,6 +114,22 @@  intel_plane_destroy_state(struct drm_plane *plane,
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
+unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
+				   const struct intel_plane_state *plane_state)
+{
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	unsigned int cpp = 0;
+	int i;
+
+	if (!plane_state->base.visible)
+		return 0;
+
+	for (i = 0; i < fb->format->num_planes; i++)
+		cpp += fb->format->cpp[i];
+
+	return cpp * crtc_state->pixel_rate;
+}
+
 int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
 					struct intel_crtc_state *new_crtc_state,
 					const struct intel_plane_state *old_plane_state,
@@ -125,6 +141,7 @@  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 	new_crtc_state->active_planes &= ~BIT(plane->id);
 	new_crtc_state->nv12_planes &= ~BIT(plane->id);
 	new_crtc_state->c8_planes &= ~BIT(plane->id);
+	new_crtc_state->data_rate[plane->id] = 0;
 	new_plane_state->base.visible = false;
 
 	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
@@ -149,6 +166,9 @@  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 	if (new_plane_state->base.visible || old_plane_state->base.visible)
 		new_crtc_state->update_planes |= BIT(plane->id);
 
+	new_crtc_state->data_rate[plane->id] =
+		intel_plane_data_rate(new_crtc_state, new_plane_state);
+
 	return intel_plane_atomic_calc_changes(old_crtc_state,
 					       &new_crtc_state->base,
 					       old_plane_state,
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
index 14678620440f..0a9651376d0e 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.h
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
@@ -15,6 +15,8 @@  struct intel_plane_state;
 
 extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
 
+unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
+				   const struct intel_plane_state *plane_state);
 void intel_update_plane(struct intel_plane *plane,
 			const struct intel_crtc_state *crtc_state,
 			const struct intel_plane_state *plane_state);
diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
new file mode 100644
index 000000000000..304bf87f0a2e
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_bw.c
@@ -0,0 +1,181 @@ 
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <drm/drm_atomic_state_helper.h>
+
+#include "intel_bw.h"
+#include "intel_drv.h"
+
+static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
+{
+	/*
+	 * We assume cursors are small enough
+	 * to not not cause bandwidth problems.
+	 */
+	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
+}
+
+static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	unsigned int data_rate = 0;
+	enum plane_id plane_id;
+
+	for_each_plane_id_on_crtc(crtc, plane_id) {
+		/*
+		 * We assume cursors are small enough
+		 * to not not cause bandwidth problems.
+		 */
+		if (plane_id == PLANE_CURSOR)
+			continue;
+
+		data_rate += crtc_state->data_rate[plane_id];
+	}
+
+	return data_rate;
+}
+
+void intel_bw_crtc_update(struct intel_bw_state *bw_state,
+			  const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+
+	bw_state->data_rate[crtc->pipe] =
+		intel_bw_crtc_data_rate(crtc_state);
+	bw_state->num_active_planes[crtc->pipe] =
+		intel_bw_crtc_num_active_planes(crtc_state);
+
+	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
+		      pipe_name(crtc->pipe),
+		      bw_state->data_rate[crtc->pipe],
+		      bw_state->num_active_planes[crtc->pipe]);
+}
+
+static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
+					       const struct intel_bw_state *bw_state)
+{
+	unsigned int num_active_planes = 0;
+	enum pipe pipe;
+
+	for_each_pipe(dev_priv, pipe)
+		num_active_planes += bw_state->num_active_planes[pipe];
+
+	return num_active_planes;
+}
+
+static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
+				       const struct intel_bw_state *bw_state)
+{
+	unsigned int data_rate = 0;
+	enum pipe pipe;
+
+	for_each_pipe(dev_priv, pipe)
+		data_rate += bw_state->data_rate[pipe];
+
+	return data_rate;
+}
+
+int intel_bw_atomic_check(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
+	struct intel_bw_state *bw_state = NULL;
+	unsigned int data_rate, max_data_rate;
+	unsigned int num_active_planes;
+	struct intel_crtc *crtc;
+	int i;
+
+	/* FIXME earlier gens need some checks too */
+	if (INTEL_GEN(dev_priv) < 11)
+		return 0;
+
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		unsigned int old_data_rate =
+			intel_bw_crtc_data_rate(old_crtc_state);
+		unsigned int new_data_rate =
+			intel_bw_crtc_data_rate(new_crtc_state);
+		unsigned int old_active_planes =
+			intel_bw_crtc_num_active_planes(old_crtc_state);
+		unsigned int new_active_planes =
+			intel_bw_crtc_num_active_planes(new_crtc_state);
+
+		/*
+		 * Avoid locking the bw state when
+		 * nothing significant has changed.
+		 */
+		if (old_data_rate == new_data_rate &&
+		    old_active_planes == new_active_planes)
+			continue;
+
+		bw_state  = intel_atomic_get_bw_state(state);
+		if (IS_ERR(bw_state))
+			return PTR_ERR(bw_state);
+
+		bw_state->data_rate[crtc->pipe] = new_data_rate;
+		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
+
+		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
+			      pipe_name(crtc->pipe),
+			      bw_state->data_rate[crtc->pipe],
+			      bw_state->num_active_planes[crtc->pipe]);
+	}
+
+	if (!bw_state)
+		return 0;
+
+	data_rate = intel_bw_data_rate(dev_priv, bw_state);
+	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
+
+	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
+
+	data_rate = DIV_ROUND_UP(data_rate, 1000);
+
+	if (data_rate > max_data_rate) {
+		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
+			      data_rate, max_data_rate, num_active_planes);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
+{
+	struct intel_bw_state *state;
+
+	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
+
+	return &state->base;
+}
+
+static void intel_bw_destroy_state(struct drm_private_obj *obj,
+				   struct drm_private_state *state)
+{
+	kfree(state);
+}
+
+static const struct drm_private_state_funcs intel_bw_funcs = {
+	.atomic_duplicate_state = intel_bw_duplicate_state,
+	.atomic_destroy_state = intel_bw_destroy_state,
+};
+
+int intel_bw_init(struct drm_i915_private *dev_priv)
+{
+	struct intel_bw_state *state;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
+				    &state->base, &intel_bw_funcs);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
new file mode 100644
index 000000000000..c14272ca5b59
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_bw.h
@@ -0,0 +1,46 @@ 
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_BW_H__
+#define __INTEL_BW_H__
+
+#include <drm/drm_atomic.h>
+
+#include "i915_drv.h"
+#include "intel_display.h"
+
+struct drm_i915_private;
+struct intel_atomic_state;
+struct intel_crtc_state;
+
+struct intel_bw_state {
+	struct drm_private_state base;
+
+	unsigned int data_rate[I915_MAX_PIPES];
+	u8 num_active_planes[I915_MAX_PIPES];
+};
+
+#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
+
+static inline struct intel_bw_state *
+intel_atomic_get_bw_state(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct drm_private_state *bw_state;
+
+	bw_state = drm_atomic_get_private_obj_state(&state->base,
+						    &dev_priv->bw_obj);
+	if (IS_ERR(bw_state))
+		return ERR_CAST(bw_state);
+
+	return to_intel_bw_state(bw_state);
+}
+
+int intel_bw_init(struct drm_i915_private *dev_priv);
+int intel_bw_atomic_check(struct intel_atomic_state *state);
+void intel_bw_crtc_update(struct intel_bw_state *bw_state,
+			  const struct intel_crtc_state *crtc_state);
+
+#endif /* __INTEL_BW_H__ */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d81ec80e34f6..a955840b73cb 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -50,6 +50,7 @@ 
 #include "intel_acpi.h"
 #include "intel_atomic.h"
 #include "intel_atomic_plane.h"
+#include "intel_bw.h"
 #include "intel_color.h"
 #include "intel_cdclk.h"
 #include "intel_crt.h"
@@ -2863,6 +2864,7 @@  static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
 
 	intel_set_plane_visible(crtc_state, plane_state, false);
 	fixup_active_planes(crtc_state);
+	crtc_state->data_rate[plane->id] = 0;
 
 	if (plane->id == PLANE_PRIMARY)
 		intel_pre_disable_primary_noatomic(&crtc->base);
@@ -6590,6 +6592,8 @@  static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 	struct intel_encoder *encoder;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+	struct intel_bw_state *bw_state =
+		to_intel_bw_state(dev_priv->bw_obj.state);
 	enum intel_display_power_domain domain;
 	struct intel_plane *plane;
 	u64 domains;
@@ -6652,6 +6656,9 @@  static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
 	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
 	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
+
+	bw_state->data_rate[intel_crtc->pipe] = 0;
+	bw_state->num_active_planes[intel_crtc->pipe] = 0;
 }
 
 /*
@@ -11176,6 +11183,7 @@  int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
 	if (!is_crtc_enabled) {
 		plane_state->visible = visible = false;
 		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
+		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
 	}
 
 	if (!was_visible && !visible)
@@ -13296,7 +13304,15 @@  static int intel_atomic_check(struct drm_device *dev,
 		return ret;
 
 	intel_fbc_choose_crtc(dev_priv, intel_state);
-	return calc_watermark_data(intel_state);
+	ret = calc_watermark_data(intel_state);
+	if (ret)
+		return ret;
+
+	ret = intel_bw_atomic_check(intel_state);
+	if (ret)
+		return ret;
+
+	return 0;
 }
 
 static int intel_atomic_prepare_commit(struct drm_device *dev,
@@ -15696,6 +15712,10 @@  int intel_modeset_init(struct drm_device *dev)
 
 	drm_mode_config_init(dev);
 
+	ret = intel_bw_init(dev_priv);
+	if (ret)
+		return ret;
+
 	dev->mode_config.min_width = 0;
 	dev->mode_config.min_height = 0;
 
@@ -16318,8 +16338,11 @@  static void intel_modeset_readout_hw_state(struct drm_device *dev)
 	drm_connector_list_iter_end(&conn_iter);
 
 	for_each_intel_crtc(dev, crtc) {
+		struct intel_bw_state *bw_state =
+			to_intel_bw_state(dev_priv->bw_obj.state);
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
+		struct intel_plane *plane;
 		int min_cdclk = 0;
 
 		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
@@ -16358,6 +16381,21 @@  static void intel_modeset_readout_hw_state(struct drm_device *dev)
 		dev_priv->min_voltage_level[crtc->pipe] =
 			crtc_state->min_voltage_level;
 
+		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
+			const struct intel_plane_state *plane_state =
+				to_intel_plane_state(plane->base.state);
+
+			/*
+			 * FIXME don't have the fb yet, so can't
+			 * use intel_plane_data_rate() :(
+			 */
+			if (plane_state->base.visible)
+				crtc_state->data_rate[plane->id] =
+					4 * crtc_state->pixel_rate;
+		}
+
+		intel_bw_crtc_update(bw_state, crtc_state);
+
 		intel_pipe_config_sanity_check(dev_priv, crtc_state);
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 4049e03d2c0d..47f551601a05 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -885,6 +885,8 @@  struct intel_crtc_state {
 
 	struct intel_crtc_wm_state wm;
 
+	u32 data_rate[I915_MAX_PLANES];
+
 	/* Gamma mode programmed on the pipe */
 	u32 gamma_mode;