diff mbox series

drm/i915: Fix FIFO underruns caused by missing cumulative bpp W/A

Message ID 20220407084235.9526-1-stanislav.lisovskiy@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Fix FIFO underruns caused by missing cumulative bpp W/A | expand

Commit Message

Lisovskiy, Stanislav April 7, 2022, 8:42 a.m. UTC
We had some FIFO underruns appearing on platforms like ADL,
which could be fixed though by increasing CDCLK, however we were
lacking explanation for that - we were not calculating CDCLK,
also based on cumulative bpp W/A formula, mentioned in BSpec 64631.

With that patch no FIFO underruns appear anymore.

Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
---
 drivers/gpu/drm/i915/display/intel_bw.c | 71 ++++++++++++++++++++++---
 drivers/gpu/drm/i915/display/intel_bw.h |  2 +
 2 files changed, 67 insertions(+), 6 deletions(-)

Comments

Ville Syrjälä April 7, 2022, 11:10 a.m. UTC | #1
On Thu, Apr 07, 2022 at 11:42:35AM +0300, Stanislav Lisovskiy wrote:
> We had some FIFO underruns appearing on platforms like ADL,
> which could be fixed though by increasing CDCLK, however we were
> lacking explanation for that - we were not calculating CDCLK,
> also based on cumulative bpp W/A formula, mentioned in BSpec 64631.

We already have that in intel_bw_crtc_min_cdclk().

> 
> With that patch no FIFO underruns appear anymore.
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_bw.c | 71 ++++++++++++++++++++++---
>  drivers/gpu/drm/i915/display/intel_bw.h |  2 +
>  2 files changed, 67 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
> index 37bd7b17f3d0..3a2aeeffee7c 100644
> --- a/drivers/gpu/drm/i915/display/intel_bw.c
> +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> @@ -743,20 +743,51 @@ static void skl_plane_calc_dbuf_bw(struct intel_bw_state *bw_state,
>  	}
>  }
>  
> -static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
> +static int intel_plane_bw_bpp(const struct drm_format_info *info)
> +{
> +	/*
> +	 * For the purposes of memory bandwidth calculations,
> +	 * planar formats are treated as if both planes had the
> +	 * same bpp (with no reduction for vertical
> +	 * subsampling). I.e we take as usual the worst case
> +	 * scenario.
> +	 */
> +	if (drm_format_info_is_yuv_semiplanar(info))
> +		return 2 * max(info->cpp[0], info->cpp[1]);
> +
> +	return info->cpp[0];
> +}
> +
> +static void skl_crtc_calc_dbuf_bw(struct intel_atomic_state *state,
> +				  struct intel_bw_state *bw_state,
>  				  const struct intel_crtc_state *crtc_state)
>  {
>  	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
>  	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
>  	struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe];
> -	enum plane_id plane_id;
> +	struct intel_plane *plane;
>  
>  	memset(crtc_bw, 0, sizeof(*crtc_bw));
>  
>  	if (!crtc_state->hw.active)
>  		return;
>  
> -	for_each_plane_id_on_crtc(crtc, plane_id) {
> +	for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) {
> +		const struct drm_framebuffer *fb;
> +		enum plane_id plane_id = plane->id;
> +		unsigned int plane_bpp = 0;
> +		struct intel_plane_state *plane_state =
> +			intel_atomic_get_new_plane_state(state, plane);
> +
> +		if (plane_state) {
> +			fb = plane_state->hw.fb;
> +
> +			if (plane_state->uapi.visible && fb)
> +				plane_bpp = intel_plane_bw_bpp(fb->format);
> +		}
> +
> +		crtc_bw->pipe_cumulative_bpp += plane_bpp;
> +
>  		/*
>  		 * We assume cursors are small enough
>  		 * to not cause bandwidth problems.
> @@ -773,6 +804,10 @@ static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
>  					       &crtc_state->wm.skl.plane_ddb_y[plane_id],
>  					       crtc_state->data_rate[plane_id]);
>  	}
> +
> +	crtc_bw->bpp_cdclk = DIV_ROUND_UP_ULL(mul_u32_u32(crtc_state->pixel_rate,
> +					      crtc_bw->pipe_cumulative_bpp * 512),
> +					      10) / 1000;
>  }
>  
>  /* "Maximum Data Buffer Bandwidth" */
> @@ -782,11 +817,13 @@ intel_bw_dbuf_min_cdclk(struct drm_i915_private *i915,
>  {
>  	unsigned int total_max_bw = 0;
>  	enum dbuf_slice slice;
> +	enum pipe pipe;
> +	unsigned int bpp_cdclk = 0;
> +	unsigned int dbuf_bw_cdclk;
>  
>  	for_each_dbuf_slice(i915, slice) {
>  		int num_active_planes = 0;
>  		unsigned int max_bw = 0;
> -		enum pipe pipe;
>  
>  		/*
>  		 * The arbiter can only really guarantee an
> @@ -803,7 +840,29 @@ intel_bw_dbuf_min_cdclk(struct drm_i915_private *i915,
>  		total_max_bw = max(total_max_bw, max_bw);
>  	}
>  
> -	return DIV_ROUND_UP(total_max_bw, 64);
> +	for_each_pipe(i915, pipe) {
> +		const struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[pipe];
> +		/*
> +		 * From BSpec 64631:
> +		 * Pipe cumulative bytes should be less or equal to
> +		 * CDCLK / (pixel_rate * scaling_factors * 51.2) thus
> +		 * CDCLK = pipe_cumulative_bpp * pixel_rate * scaling_factors * 51.2.
> +		 * Considering that intel_plane_pixel_rate already returns adjusted pixel rate,
> +		 * no scaling factors needed here.
> +		 */
> +		bpp_cdclk = max_t(unsigned int, crtc_bw->bpp_cdclk,
> +						bpp_cdclk);
> +	}
> +
> +	dbuf_bw_cdclk = DIV_ROUND_UP(total_max_bw, 64);
> +
> +	/*
> +	 * So now we have two CDCLK estimations:
> +	 * one is based on required DBuf BW and another is
> +	 * based on pipe cumulative bpp W/A(BSpec 64631)
> +	 * Traditionally take the more demanding into use(worst case)
> +	 */
> +	return max_t(unsigned int, dbuf_bw_cdclk, bpp_cdclk);
>  }
>  
>  int intel_bw_min_cdclk(struct drm_i915_private *i915,
> @@ -842,7 +901,7 @@ int intel_bw_calc_min_cdclk(struct intel_atomic_state *state,
>  
>  		old_bw_state = intel_atomic_get_old_bw_state(state);
>  
> -		skl_crtc_calc_dbuf_bw(new_bw_state, crtc_state);
> +		skl_crtc_calc_dbuf_bw(state, new_bw_state, crtc_state);
>  
>  		new_bw_state->min_cdclk[crtc->pipe] =
>  			intel_bw_crtc_min_cdclk(crtc_state);
> diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
> index cb7ee3a24a58..9e3a6ad03b19 100644
> --- a/drivers/gpu/drm/i915/display/intel_bw.h
> +++ b/drivers/gpu/drm/i915/display/intel_bw.h
> @@ -19,6 +19,8 @@ struct intel_crtc_state;
>  struct intel_dbuf_bw {
>  	unsigned int max_bw[I915_MAX_DBUF_SLICES];
>  	u8 active_planes[I915_MAX_DBUF_SLICES];
> +	unsigned int pipe_cumulative_bpp;
> +	unsigned int bpp_cdclk;
>  };
>  
>  struct intel_bw_state {
> -- 
> 2.24.1.485.gad05a3d8e5
Lisovskiy, Stanislav April 7, 2022, 11:24 a.m. UTC | #2
On Thu, Apr 07, 2022 at 02:10:52PM +0300, Ville Syrjälä wrote:
> On Thu, Apr 07, 2022 at 11:42:35AM +0300, Stanislav Lisovskiy wrote:
> > We had some FIFO underruns appearing on platforms like ADL,
> > which could be fixed though by increasing CDCLK, however we were
> > lacking explanation for that - we were not calculating CDCLK,
> > also based on cumulative bpp W/A formula, mentioned in BSpec 64631.
> 
> We already have that in intel_bw_crtc_min_cdclk().

It actually is not quite what BSpec is talking about it adds
data_rate per plane, instead of bpp, I think it confuses 
those 2 from BSpec:

"
Plane required bandwidth MB/s = pixel rate MHz * source pixel format in bytes 
* plane down scale amount * pipe down scale amount
Display required memory bandwidth MB/s += Plane required bandwidth
Pipe cumulative bytes per pixel += plane source pixel format in bytes
"

then we have to different formulas used to estimate whats the CDCLK
should be, one is "DBUF maximum data buffer bandwidth MB/s = CDCLK frequency MHz * 64 Bytes"

another is pipe CDCLK = cumulative bytes per pixel * (pixel rate MHz * 
plane down scale amount * pipe down scale amount)) * 51.2)

So as I checked for some tests in kms_plane_multiple the estimation
from the latter was sometimes higher than the one we currently have and
it happened exactly when we did have FIFO underruns, otherwise CDCLK
stayed pretty much the same, which also shows that there is something
obvisously wrong with current calculations.

That patch fixes it and we don't have underruns at after it is applied.


Stan

> 
> > 
> > With that patch no FIFO underruns appear anymore.
> > 
> > Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> > ---
> >  drivers/gpu/drm/i915/display/intel_bw.c | 71 ++++++++++++++++++++++---
> >  drivers/gpu/drm/i915/display/intel_bw.h |  2 +
> >  2 files changed, 67 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
> > index 37bd7b17f3d0..3a2aeeffee7c 100644
> > --- a/drivers/gpu/drm/i915/display/intel_bw.c
> > +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> > @@ -743,20 +743,51 @@ static void skl_plane_calc_dbuf_bw(struct intel_bw_state *bw_state,
> >  	}
> >  }
> >  
> > -static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
> > +static int intel_plane_bw_bpp(const struct drm_format_info *info)
> > +{
> > +	/*
> > +	 * For the purposes of memory bandwidth calculations,
> > +	 * planar formats are treated as if both planes had the
> > +	 * same bpp (with no reduction for vertical
> > +	 * subsampling). I.e we take as usual the worst case
> > +	 * scenario.
> > +	 */
> > +	if (drm_format_info_is_yuv_semiplanar(info))
> > +		return 2 * max(info->cpp[0], info->cpp[1]);
> > +
> > +	return info->cpp[0];
> > +}
> > +
> > +static void skl_crtc_calc_dbuf_bw(struct intel_atomic_state *state,
> > +				  struct intel_bw_state *bw_state,
> >  				  const struct intel_crtc_state *crtc_state)
> >  {
> >  	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
> >  	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
> >  	struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe];
> > -	enum plane_id plane_id;
> > +	struct intel_plane *plane;
> >  
> >  	memset(crtc_bw, 0, sizeof(*crtc_bw));
> >  
> >  	if (!crtc_state->hw.active)
> >  		return;
> >  
> > -	for_each_plane_id_on_crtc(crtc, plane_id) {
> > +	for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) {
> > +		const struct drm_framebuffer *fb;
> > +		enum plane_id plane_id = plane->id;
> > +		unsigned int plane_bpp = 0;
> > +		struct intel_plane_state *plane_state =
> > +			intel_atomic_get_new_plane_state(state, plane);
> > +
> > +		if (plane_state) {
> > +			fb = plane_state->hw.fb;
> > +
> > +			if (plane_state->uapi.visible && fb)
> > +				plane_bpp = intel_plane_bw_bpp(fb->format);
> > +		}
> > +
> > +		crtc_bw->pipe_cumulative_bpp += plane_bpp;
> > +
> >  		/*
> >  		 * We assume cursors are small enough
> >  		 * to not cause bandwidth problems.
> > @@ -773,6 +804,10 @@ static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
> >  					       &crtc_state->wm.skl.plane_ddb_y[plane_id],
> >  					       crtc_state->data_rate[plane_id]);
> >  	}
> > +
> > +	crtc_bw->bpp_cdclk = DIV_ROUND_UP_ULL(mul_u32_u32(crtc_state->pixel_rate,
> > +					      crtc_bw->pipe_cumulative_bpp * 512),
> > +					      10) / 1000;
> >  }
> >  
> >  /* "Maximum Data Buffer Bandwidth" */
> > @@ -782,11 +817,13 @@ intel_bw_dbuf_min_cdclk(struct drm_i915_private *i915,
> >  {
> >  	unsigned int total_max_bw = 0;
> >  	enum dbuf_slice slice;
> > +	enum pipe pipe;
> > +	unsigned int bpp_cdclk = 0;
> > +	unsigned int dbuf_bw_cdclk;
> >  
> >  	for_each_dbuf_slice(i915, slice) {
> >  		int num_active_planes = 0;
> >  		unsigned int max_bw = 0;
> > -		enum pipe pipe;
> >  
> >  		/*
> >  		 * The arbiter can only really guarantee an
> > @@ -803,7 +840,29 @@ intel_bw_dbuf_min_cdclk(struct drm_i915_private *i915,
> >  		total_max_bw = max(total_max_bw, max_bw);
> >  	}
> >  
> > -	return DIV_ROUND_UP(total_max_bw, 64);
> > +	for_each_pipe(i915, pipe) {
> > +		const struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[pipe];
> > +		/*
> > +		 * From BSpec 64631:
> > +		 * Pipe cumulative bytes should be less or equal to
> > +		 * CDCLK / (pixel_rate * scaling_factors * 51.2) thus
> > +		 * CDCLK = pipe_cumulative_bpp * pixel_rate * scaling_factors * 51.2.
> > +		 * Considering that intel_plane_pixel_rate already returns adjusted pixel rate,
> > +		 * no scaling factors needed here.
> > +		 */
> > +		bpp_cdclk = max_t(unsigned int, crtc_bw->bpp_cdclk,
> > +						bpp_cdclk);
> > +	}
> > +
> > +	dbuf_bw_cdclk = DIV_ROUND_UP(total_max_bw, 64);
> > +
> > +	/*
> > +	 * So now we have two CDCLK estimations:
> > +	 * one is based on required DBuf BW and another is
> > +	 * based on pipe cumulative bpp W/A(BSpec 64631)
> > +	 * Traditionally take the more demanding into use(worst case)
> > +	 */
> > +	return max_t(unsigned int, dbuf_bw_cdclk, bpp_cdclk);
> >  }
> >  
> >  int intel_bw_min_cdclk(struct drm_i915_private *i915,
> > @@ -842,7 +901,7 @@ int intel_bw_calc_min_cdclk(struct intel_atomic_state *state,
> >  
> >  		old_bw_state = intel_atomic_get_old_bw_state(state);
> >  
> > -		skl_crtc_calc_dbuf_bw(new_bw_state, crtc_state);
> > +		skl_crtc_calc_dbuf_bw(state, new_bw_state, crtc_state);
> >  
> >  		new_bw_state->min_cdclk[crtc->pipe] =
> >  			intel_bw_crtc_min_cdclk(crtc_state);
> > diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
> > index cb7ee3a24a58..9e3a6ad03b19 100644
> > --- a/drivers/gpu/drm/i915/display/intel_bw.h
> > +++ b/drivers/gpu/drm/i915/display/intel_bw.h
> > @@ -19,6 +19,8 @@ struct intel_crtc_state;
> >  struct intel_dbuf_bw {
> >  	unsigned int max_bw[I915_MAX_DBUF_SLICES];
> >  	u8 active_planes[I915_MAX_DBUF_SLICES];
> > +	unsigned int pipe_cumulative_bpp;
> > +	unsigned int bpp_cdclk;
> >  };
> >  
> >  struct intel_bw_state {
> > -- 
> > 2.24.1.485.gad05a3d8e5
> 
> -- 
> Ville Syrjälä
> Intel
Lisovskiy, Stanislav April 7, 2022, 11:39 a.m. UTC | #3
On Thu, Apr 07, 2022 at 02:10:52PM +0300, Ville Syrjälä wrote:
> On Thu, Apr 07, 2022 at 11:42:35AM +0300, Stanislav Lisovskiy wrote:
> > We had some FIFO underruns appearing on platforms like ADL,
> > which could be fixed though by increasing CDCLK, however we were
> > lacking explanation for that - we were not calculating CDCLK,
> > also based on cumulative bpp W/A formula, mentioned in BSpec 64631.
> 
> We already have that in intel_bw_crtc_min_cdclk().


One more remark, is that actually indeed the data rate should be fine
for that, but looks like for some reason those calculations we have
currently are not pessimistic enough.
Could be because mine are lacking scaling factor - but in kms_plane_multiple
we don't use scaling. Wondering if its those multiplane formats,
because as I see you don't add those gens >= 11.
Or something else - there must be something different, because 
it manages to raise CDCLK now exactly at "right" moments to avoid
FIFO underrun.

Stan

> 
> > 
> > With that patch no FIFO underruns appear anymore.
> > 
> > Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> > ---
> >  drivers/gpu/drm/i915/display/intel_bw.c | 71 ++++++++++++++++++++++---
> >  drivers/gpu/drm/i915/display/intel_bw.h |  2 +
> >  2 files changed, 67 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
> > index 37bd7b17f3d0..3a2aeeffee7c 100644
> > --- a/drivers/gpu/drm/i915/display/intel_bw.c
> > +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> > @@ -743,20 +743,51 @@ static void skl_plane_calc_dbuf_bw(struct intel_bw_state *bw_state,
> >  	}
> >  }
> >  
> > -static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
> > +static int intel_plane_bw_bpp(const struct drm_format_info *info)
> > +{
> > +	/*
> > +	 * For the purposes of memory bandwidth calculations,
> > +	 * planar formats are treated as if both planes had the
> > +	 * same bpp (with no reduction for vertical
> > +	 * subsampling). I.e we take as usual the worst case
> > +	 * scenario.
> > +	 */
> > +	if (drm_format_info_is_yuv_semiplanar(info))
> > +		return 2 * max(info->cpp[0], info->cpp[1]);
> > +
> > +	return info->cpp[0];
> > +}
> > +
> > +static void skl_crtc_calc_dbuf_bw(struct intel_atomic_state *state,
> > +				  struct intel_bw_state *bw_state,
> >  				  const struct intel_crtc_state *crtc_state)
> >  {
> >  	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
> >  	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
> >  	struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe];
> > -	enum plane_id plane_id;
> > +	struct intel_plane *plane;
> >  
> >  	memset(crtc_bw, 0, sizeof(*crtc_bw));
> >  
> >  	if (!crtc_state->hw.active)
> >  		return;
> >  
> > -	for_each_plane_id_on_crtc(crtc, plane_id) {
> > +	for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) {
> > +		const struct drm_framebuffer *fb;
> > +		enum plane_id plane_id = plane->id;
> > +		unsigned int plane_bpp = 0;
> > +		struct intel_plane_state *plane_state =
> > +			intel_atomic_get_new_plane_state(state, plane);
> > +
> > +		if (plane_state) {
> > +			fb = plane_state->hw.fb;
> > +
> > +			if (plane_state->uapi.visible && fb)
> > +				plane_bpp = intel_plane_bw_bpp(fb->format);
> > +		}
> > +
> > +		crtc_bw->pipe_cumulative_bpp += plane_bpp;
> > +
> >  		/*
> >  		 * We assume cursors are small enough
> >  		 * to not cause bandwidth problems.
> > @@ -773,6 +804,10 @@ static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
> >  					       &crtc_state->wm.skl.plane_ddb_y[plane_id],
> >  					       crtc_state->data_rate[plane_id]);
> >  	}
> > +
> > +	crtc_bw->bpp_cdclk = DIV_ROUND_UP_ULL(mul_u32_u32(crtc_state->pixel_rate,
> > +					      crtc_bw->pipe_cumulative_bpp * 512),
> > +					      10) / 1000;
> >  }
> >  
> >  /* "Maximum Data Buffer Bandwidth" */
> > @@ -782,11 +817,13 @@ intel_bw_dbuf_min_cdclk(struct drm_i915_private *i915,
> >  {
> >  	unsigned int total_max_bw = 0;
> >  	enum dbuf_slice slice;
> > +	enum pipe pipe;
> > +	unsigned int bpp_cdclk = 0;
> > +	unsigned int dbuf_bw_cdclk;
> >  
> >  	for_each_dbuf_slice(i915, slice) {
> >  		int num_active_planes = 0;
> >  		unsigned int max_bw = 0;
> > -		enum pipe pipe;
> >  
> >  		/*
> >  		 * The arbiter can only really guarantee an
> > @@ -803,7 +840,29 @@ intel_bw_dbuf_min_cdclk(struct drm_i915_private *i915,
> >  		total_max_bw = max(total_max_bw, max_bw);
> >  	}
> >  
> > -	return DIV_ROUND_UP(total_max_bw, 64);
> > +	for_each_pipe(i915, pipe) {
> > +		const struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[pipe];
> > +		/*
> > +		 * From BSpec 64631:
> > +		 * Pipe cumulative bytes should be less or equal to
> > +		 * CDCLK / (pixel_rate * scaling_factors * 51.2) thus
> > +		 * CDCLK = pipe_cumulative_bpp * pixel_rate * scaling_factors * 51.2.
> > +		 * Considering that intel_plane_pixel_rate already returns adjusted pixel rate,
> > +		 * no scaling factors needed here.
> > +		 */
> > +		bpp_cdclk = max_t(unsigned int, crtc_bw->bpp_cdclk,
> > +						bpp_cdclk);
> > +	}
> > +
> > +	dbuf_bw_cdclk = DIV_ROUND_UP(total_max_bw, 64);
> > +
> > +	/*
> > +	 * So now we have two CDCLK estimations:
> > +	 * one is based on required DBuf BW and another is
> > +	 * based on pipe cumulative bpp W/A(BSpec 64631)
> > +	 * Traditionally take the more demanding into use(worst case)
> > +	 */
> > +	return max_t(unsigned int, dbuf_bw_cdclk, bpp_cdclk);
> >  }
> >  
> >  int intel_bw_min_cdclk(struct drm_i915_private *i915,
> > @@ -842,7 +901,7 @@ int intel_bw_calc_min_cdclk(struct intel_atomic_state *state,
> >  
> >  		old_bw_state = intel_atomic_get_old_bw_state(state);
> >  
> > -		skl_crtc_calc_dbuf_bw(new_bw_state, crtc_state);
> > +		skl_crtc_calc_dbuf_bw(state, new_bw_state, crtc_state);
> >  
> >  		new_bw_state->min_cdclk[crtc->pipe] =
> >  			intel_bw_crtc_min_cdclk(crtc_state);
> > diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
> > index cb7ee3a24a58..9e3a6ad03b19 100644
> > --- a/drivers/gpu/drm/i915/display/intel_bw.h
> > +++ b/drivers/gpu/drm/i915/display/intel_bw.h
> > @@ -19,6 +19,8 @@ struct intel_crtc_state;
> >  struct intel_dbuf_bw {
> >  	unsigned int max_bw[I915_MAX_DBUF_SLICES];
> >  	u8 active_planes[I915_MAX_DBUF_SLICES];
> > +	unsigned int pipe_cumulative_bpp;
> > +	unsigned int bpp_cdclk;
> >  };
> >  
> >  struct intel_bw_state {
> > -- 
> > 2.24.1.485.gad05a3d8e5
> 
> -- 
> Ville Syrjälä
> Intel
Ville Syrjälä April 7, 2022, 12:04 p.m. UTC | #4
On Thu, Apr 07, 2022 at 02:24:17PM +0300, Lisovskiy, Stanislav wrote:
> On Thu, Apr 07, 2022 at 02:10:52PM +0300, Ville Syrjälä wrote:
> > On Thu, Apr 07, 2022 at 11:42:35AM +0300, Stanislav Lisovskiy wrote:
> > > We had some FIFO underruns appearing on platforms like ADL,
> > > which could be fixed though by increasing CDCLK, however we were
> > > lacking explanation for that - we were not calculating CDCLK,
> > > also based on cumulative bpp W/A formula, mentioned in BSpec 64631.
> > 
> > We already have that in intel_bw_crtc_min_cdclk().
> 
> It actually is not quite what BSpec is talking about it adds
> data_rate per plane, instead of bpp, I think it confuses 
> those 2 from BSpec:
> 
> "
> Plane required bandwidth MB/s = pixel rate MHz * source pixel format in bytes 
> * plane down scale amount * pipe down scale amount
> Display required memory bandwidth MB/s += Plane required bandwidth
> Pipe cumulative bytes per pixel += plane source pixel format in bytes
> "
> 
> then we have to different formulas used to estimate whats the CDCLK
> should be, one is "DBUF maximum data buffer bandwidth MB/s = CDCLK frequency MHz * 64 Bytes"
> 
> another is pipe CDCLK = cumulative bytes per pixel * (pixel rate MHz * 
> plane down scale amount * pipe down scale amount)) * 51.2)

That specific statement in the spec is kinda nonsense. Mixing
"plane up/down scale amount" (which is per plane) with this
"cumulative bytes per pixel" thing (which is sum of bytes per pixel
across all planes) doesn't make sense.

What we so is sum bytes_per_pixel*pixel_rate*plane_up/down_scale from
all planes, which I think is what the spec is trying to say as that
is the cumulative bw used up by all the planes.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
index 37bd7b17f3d0..3a2aeeffee7c 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -743,20 +743,51 @@  static void skl_plane_calc_dbuf_bw(struct intel_bw_state *bw_state,
 	}
 }
 
-static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
+static int intel_plane_bw_bpp(const struct drm_format_info *info)
+{
+	/*
+	 * For the purposes of memory bandwidth calculations,
+	 * planar formats are treated as if both planes had the
+	 * same bpp (with no reduction for vertical
+	 * subsampling). I.e we take as usual the worst case
+	 * scenario.
+	 */
+	if (drm_format_info_is_yuv_semiplanar(info))
+		return 2 * max(info->cpp[0], info->cpp[1]);
+
+	return info->cpp[0];
+}
+
+static void skl_crtc_calc_dbuf_bw(struct intel_atomic_state *state,
+				  struct intel_bw_state *bw_state,
 				  const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe];
-	enum plane_id plane_id;
+	struct intel_plane *plane;
 
 	memset(crtc_bw, 0, sizeof(*crtc_bw));
 
 	if (!crtc_state->hw.active)
 		return;
 
-	for_each_plane_id_on_crtc(crtc, plane_id) {
+	for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) {
+		const struct drm_framebuffer *fb;
+		enum plane_id plane_id = plane->id;
+		unsigned int plane_bpp = 0;
+		struct intel_plane_state *plane_state =
+			intel_atomic_get_new_plane_state(state, plane);
+
+		if (plane_state) {
+			fb = plane_state->hw.fb;
+
+			if (plane_state->uapi.visible && fb)
+				plane_bpp = intel_plane_bw_bpp(fb->format);
+		}
+
+		crtc_bw->pipe_cumulative_bpp += plane_bpp;
+
 		/*
 		 * We assume cursors are small enough
 		 * to not cause bandwidth problems.
@@ -773,6 +804,10 @@  static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
 					       &crtc_state->wm.skl.plane_ddb_y[plane_id],
 					       crtc_state->data_rate[plane_id]);
 	}
+
+	crtc_bw->bpp_cdclk = DIV_ROUND_UP_ULL(mul_u32_u32(crtc_state->pixel_rate,
+					      crtc_bw->pipe_cumulative_bpp * 512),
+					      10) / 1000;
 }
 
 /* "Maximum Data Buffer Bandwidth" */
@@ -782,11 +817,13 @@  intel_bw_dbuf_min_cdclk(struct drm_i915_private *i915,
 {
 	unsigned int total_max_bw = 0;
 	enum dbuf_slice slice;
+	enum pipe pipe;
+	unsigned int bpp_cdclk = 0;
+	unsigned int dbuf_bw_cdclk;
 
 	for_each_dbuf_slice(i915, slice) {
 		int num_active_planes = 0;
 		unsigned int max_bw = 0;
-		enum pipe pipe;
 
 		/*
 		 * The arbiter can only really guarantee an
@@ -803,7 +840,29 @@  intel_bw_dbuf_min_cdclk(struct drm_i915_private *i915,
 		total_max_bw = max(total_max_bw, max_bw);
 	}
 
-	return DIV_ROUND_UP(total_max_bw, 64);
+	for_each_pipe(i915, pipe) {
+		const struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[pipe];
+		/*
+		 * From BSpec 64631:
+		 * Pipe cumulative bytes should be less or equal to
+		 * CDCLK / (pixel_rate * scaling_factors * 51.2) thus
+		 * CDCLK = pipe_cumulative_bpp * pixel_rate * scaling_factors * 51.2.
+		 * Considering that intel_plane_pixel_rate already returns adjusted pixel rate,
+		 * no scaling factors needed here.
+		 */
+		bpp_cdclk = max_t(unsigned int, crtc_bw->bpp_cdclk,
+						bpp_cdclk);
+	}
+
+	dbuf_bw_cdclk = DIV_ROUND_UP(total_max_bw, 64);
+
+	/*
+	 * So now we have two CDCLK estimations:
+	 * one is based on required DBuf BW and another is
+	 * based on pipe cumulative bpp W/A(BSpec 64631)
+	 * Traditionally take the more demanding into use(worst case)
+	 */
+	return max_t(unsigned int, dbuf_bw_cdclk, bpp_cdclk);
 }
 
 int intel_bw_min_cdclk(struct drm_i915_private *i915,
@@ -842,7 +901,7 @@  int intel_bw_calc_min_cdclk(struct intel_atomic_state *state,
 
 		old_bw_state = intel_atomic_get_old_bw_state(state);
 
-		skl_crtc_calc_dbuf_bw(new_bw_state, crtc_state);
+		skl_crtc_calc_dbuf_bw(state, new_bw_state, crtc_state);
 
 		new_bw_state->min_cdclk[crtc->pipe] =
 			intel_bw_crtc_min_cdclk(crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
index cb7ee3a24a58..9e3a6ad03b19 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.h
+++ b/drivers/gpu/drm/i915/display/intel_bw.h
@@ -19,6 +19,8 @@  struct intel_crtc_state;
 struct intel_dbuf_bw {
 	unsigned int max_bw[I915_MAX_DBUF_SLICES];
 	u8 active_planes[I915_MAX_DBUF_SLICES];
+	unsigned int pipe_cumulative_bpp;
+	unsigned int bpp_cdclk;
 };
 
 struct intel_bw_state {