diff mbox series

drm/i915: Ensure DSC has enough BW and stays within HW limits

Message ID 20230306080401.22552-1-stanislav.lisovskiy@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Ensure DSC has enough BW and stays within HW limits | expand

Commit Message

Lisovskiy, Stanislav March 6, 2023, 8:04 a.m. UTC
We have currently the issue with some BPPs when using DSC.
According the HW team the reason is that single VDSC engine instance,
has some BW limitations which have to be accounted, so whenever
we approach around 90% of the CDCLK, second VDSC engine have to be
used. Also that always means using 2 slices, however in our current code
amount of slices is calculated for some reason independently of
whether we need to enable 2nd VDSC engine or not, thus leading to
some logical issues, when according to pixel clock needs we need to enable
2nd VDSC engine, however as we calculated previously that we can use
only single slice, we can't do that and fail.
So we need to fix that, so that amount of VDSC engines enabled should depend
on amount of slices and amount of slices should also depend on BW requirements.
Lastly we didn't have BPP limitation for ADLP/MTL/DG2 implemented which says
that DSC output BPP's can only be chosen within range of 8 to 27(BSpec 49259).
This all applied together allows to fix existing FIFO underruns, which we
have in many DSC tests.

BSpec: 49259
HSDES: 18027167222

Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

Comments

Vinod Govindapillai March 6, 2023, 12:01 p.m. UTC | #1
Hi Stan

On Mon, 2023-03-06 at 10:04 +0200, Stanislav Lisovskiy wrote:
> We have currently the issue with some BPPs when using DSC.
> According the HW team the reason is that single VDSC engine instance,
> has some BW limitations which have to be accounted, so whenever
> we approach around 90% of the CDCLK, second VDSC engine have to be
> used. Also that always means using 2 slices, however in our current code
> amount of slices is calculated for some reason independently of
> whether we need to enable 2nd VDSC engine or not, thus leading to
> some logical issues, when according to pixel clock needs we need to enable
> 2nd VDSC engine, however as we calculated previously that we can use
> only single slice, we can't do that and fail.
> So we need to fix that, so that amount of VDSC engines enabled should depend
> on amount of slices and amount of slices should also depend on BW requirements.
> Lastly we didn't have BPP limitation for ADLP/MTL/DG2 implemented which says
> that DSC output BPP's can only be chosen within range of 8 to 27(BSpec 49259).
> This all applied together allows to fix existing FIFO underruns, which we
> have in many DSC tests.
> 
> BSpec: 49259
> HSDES: 18027167222
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_dp.c | 21 +++++++++++----------
>  1 file changed, 11 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
> index aee93b0d810e..e3680ae95b83 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> @@ -687,6 +687,12 @@ u32 intel_dp_dsc_nearest_valid_bpp(struct drm_i915_private *i915, u32 bpp,
> u32 p
>         /* From XE_LPD onwards we support from bpc upto uncompressed bpp-1 BPPs */
>         if (DISPLAY_VER(i915) >= 13) {
>                 bits_per_pixel = min(bits_per_pixel, pipe_bpp - 1);
> +
> +               /* According to BSpec, 27 is the max DSC output bpp */
> +               bits_per_pixel = min(bits_per_pixel, (u32)27);
> +
> +               /* According to BSpec, 8 is the min DSC output bpp */
> +               bits_per_pixel = max(bits_per_pixel, (u32)8);

Just before this loop, there is check if if (bits_per_pixel < valid_dsc_bpp[0]) then it returns as
"unsupported BPP". The valid_dsc_bpp[0] is 6. Should this need to be updated to 8 for disp_ver >= 13
onward? Please check.. 


Reviewed-by: Vinod Govindapillai <vinod.govindapillai@intel.com>


>         } else {
>                 /* Find the nearest match in the array of known BPPs from VESA */
>                 for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) {
> @@ -771,6 +777,9 @@ u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
>                 min_slice_count = DIV_ROUND_UP(mode_clock,
>                                                DP_DSC_MAX_ENC_THROUGHPUT_1);
>  
> +       if (mode_clock >= ((i915->display.cdclk.max_cdclk_freq * 85) / 100))
> +               min_slice_count = max(min_slice_count, (u8)2);
> +
>         max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd);
>         if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) {
>                 drm_dbg_kms(&i915->drm,
> @@ -1597,16 +1606,8 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
>          * is greater than the maximum Cdclock and if slice count is even
>          * then we need to use 2 VDSC instances.
>          */
> -       if (adjusted_mode->crtc_clock > dev_priv->display.cdclk.max_cdclk_freq ||
> -           pipe_config->bigjoiner_pipes) {
> -               if (pipe_config->dsc.slice_count > 1) {
> -                       pipe_config->dsc.dsc_split = true;
> -               } else {
> -                       drm_dbg_kms(&dev_priv->drm,
> -                                   "Cannot split stream to use 2 VDSC instances\n");
> -                       return -EINVAL;
> -               }
> -       }
> +       if (pipe_config->bigjoiner_pipes || pipe_config->dsc.slice_count > 1)
> +               pipe_config->dsc.dsc_split = true;
>  
>         ret = intel_dp_dsc_compute_params(&dig_port->base, pipe_config);
>         if (ret < 0) {
Sharma, Swati2 March 6, 2023, 1:17 p.m. UTC | #2
On 06-Mar-23 1:34 PM, Stanislav Lisovskiy wrote:
> We have currently the issue with some BPPs when using DSC
 >> Nitpick: We currently have an issue with some BPPs when using DSC.

> According the HW team the reason is that single VDSC engine instance,
 >> Nitpick: According "to" the ..

> has some BW limitations which have to be accounted, so whenever
> we approach around 90% of the CDCLK, second VDSC engine have to be
> used. Also that always means using 2 slices, however in our current code
> amount of slices is calculated for some reason independently of
> whether we need to enable 2nd VDSC engine or not, thus leading to
> some logical issues, when according to pixel clock needs we need to enable
> 2nd VDSC engine, however as we calculated previously that we can use
> only single slice, we can't do that and fail.
> So we need to fix that, so that amount of VDSC engines enabled should depend
> on amount of slices and amount of slices should also depend on BW requirements.
> Lastly we didn't have BPP limitation for ADLP/MTL/DG2 implemented which says
> that DSC output BPP's can only be chosen within range of 8 to 27(BSpec 49259).
> This all applied together allows to fix existing FIFO underruns, which we
> have in many DSC tests.
> 
> BSpec: 49259
> HSDES: 18027167222

Also, please add closes 
https://gitlab.freedesktop.org/drm/intel/-/issues/8231
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> ---
>   drivers/gpu/drm/i915/display/intel_dp.c | 21 +++++++++++----------
>   1 file changed, 11 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
> index aee93b0d810e..e3680ae95b83 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> @@ -687,6 +687,12 @@ u32 intel_dp_dsc_nearest_valid_bpp(struct drm_i915_private *i915, u32 bpp, u32 p
>   	/* From XE_LPD onwards we support from bpc upto uncompressed bpp-1 BPPs */
>   	if (DISPLAY_VER(i915) >= 13) {
>   		bits_per_pixel = min(bits_per_pixel, pipe_bpp - 1);
> +
> +		/* According to BSpec, 27 is the max DSC output bpp */
> +		bits_per_pixel = min(bits_per_pixel, (u32)27);
> +
> +		/* According to BSpec, 8 is the min DSC output bpp */
> +		bits_per_pixel = max(bits_per_pixel, (u32)8);
>   	} else {
>   		/* Find the nearest match in the array of known BPPs from VESA */
>   		for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) {
> @@ -771,6 +777,9 @@ u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
>   		min_slice_count = DIV_ROUND_UP(mode_clock,
>   					       DP_DSC_MAX_ENC_THROUGHPUT_1);
>   
> +	if (mode_clock >= ((i915->display.cdclk.max_cdclk_freq * 85) / 100))
> +		min_slice_count = max(min_slice_count, (u8)2);
> +
>   	max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd);
>   	if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) {
>   		drm_dbg_kms(&i915->drm,
> @@ -1597,16 +1606,8 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
>   	 * is greater than the maximum Cdclock and if slice count is even
>   	 * then we need to use 2 VDSC instances.
>   	 */
> -	if (adjusted_mode->crtc_clock > dev_priv->display.cdclk.max_cdclk_freq ||
> -	    pipe_config->bigjoiner_pipes) {
> -		if (pipe_config->dsc.slice_count > 1) {
> -			pipe_config->dsc.dsc_split = true;
> -		} else {
> -			drm_dbg_kms(&dev_priv->drm,
> -				    "Cannot split stream to use 2 VDSC instances\n");
> -			return -EINVAL;
> -		}
> -	}
> +	if (pipe_config->bigjoiner_pipes || pipe_config->dsc.slice_count > 1)
> +		pipe_config->dsc.dsc_split = true;
>   
>   	ret = intel_dp_dsc_compute_params(&dig_port->base, pipe_config);
>   	if (ret < 0) {
Jani Nikula March 6, 2023, 1:40 p.m. UTC | #3
On Mon, 06 Mar 2023, Stanislav Lisovskiy <stanislav.lisovskiy@intel.com> wrote:
> We have currently the issue with some BPPs when using DSC.
> According the HW team the reason is that single VDSC engine instance,
> has some BW limitations which have to be accounted, so whenever
> we approach around 90% of the CDCLK, second VDSC engine have to be
> used. Also that always means using 2 slices, however in our current code
> amount of slices is calculated for some reason independently of
> whether we need to enable 2nd VDSC engine or not, thus leading to
> some logical issues, when according to pixel clock needs we need to enable
> 2nd VDSC engine, however as we calculated previously that we can use
> only single slice, we can't do that and fail.
> So we need to fix that, so that amount of VDSC engines enabled should depend
> on amount of slices and amount of slices should also depend on BW requirements.
> Lastly we didn't have BPP limitation for ADLP/MTL/DG2 implemented which says
> that DSC output BPP's can only be chosen within range of 8 to 27(BSpec 49259).
> This all applied together allows to fix existing FIFO underruns, which we
> have in many DSC tests.
>
> BSpec: 49259
> HSDES: 18027167222
>
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_dp.c | 21 +++++++++++----------
>  1 file changed, 11 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
> index aee93b0d810e..e3680ae95b83 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> @@ -687,6 +687,12 @@ u32 intel_dp_dsc_nearest_valid_bpp(struct drm_i915_private *i915, u32 bpp, u32 p
>  	/* From XE_LPD onwards we support from bpc upto uncompressed bpp-1 BPPs */
>  	if (DISPLAY_VER(i915) >= 13) {
>  		bits_per_pixel = min(bits_per_pixel, pipe_bpp - 1);
> +
> +		/* According to BSpec, 27 is the max DSC output bpp */
> +		bits_per_pixel = min(bits_per_pixel, (u32)27);
> +
> +		/* According to BSpec, 8 is the min DSC output bpp */
> +		bits_per_pixel = max(bits_per_pixel, (u32)8);

Please use clamp() or clamp_t() for ranges. Avoid casting the
params. The _t variants are for handling that.

>  	} else {
>  		/* Find the nearest match in the array of known BPPs from VESA */
>  		for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) {
> @@ -771,6 +777,9 @@ u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
>  		min_slice_count = DIV_ROUND_UP(mode_clock,
>  					       DP_DSC_MAX_ENC_THROUGHPUT_1);
>  
> +	if (mode_clock >= ((i915->display.cdclk.max_cdclk_freq * 85) / 100))
> +		min_slice_count = max(min_slice_count, (u8)2);
> +
>  	max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd);
>  	if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) {
>  		drm_dbg_kms(&i915->drm,
> @@ -1597,16 +1606,8 @@ int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
>  	 * is greater than the maximum Cdclock and if slice count is even
>  	 * then we need to use 2 VDSC instances.
>  	 */
> -	if (adjusted_mode->crtc_clock > dev_priv->display.cdclk.max_cdclk_freq ||
> -	    pipe_config->bigjoiner_pipes) {
> -		if (pipe_config->dsc.slice_count > 1) {
> -			pipe_config->dsc.dsc_split = true;
> -		} else {
> -			drm_dbg_kms(&dev_priv->drm,
> -				    "Cannot split stream to use 2 VDSC instances\n");
> -			return -EINVAL;
> -		}
> -	}
> +	if (pipe_config->bigjoiner_pipes || pipe_config->dsc.slice_count > 1)
> +		pipe_config->dsc.dsc_split = true;
>  
>  	ret = intel_dp_dsc_compute_params(&dig_port->base, pipe_config);
>  	if (ret < 0) {
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index aee93b0d810e..e3680ae95b83 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -687,6 +687,12 @@  u32 intel_dp_dsc_nearest_valid_bpp(struct drm_i915_private *i915, u32 bpp, u32 p
 	/* From XE_LPD onwards we support from bpc upto uncompressed bpp-1 BPPs */
 	if (DISPLAY_VER(i915) >= 13) {
 		bits_per_pixel = min(bits_per_pixel, pipe_bpp - 1);
+
+		/* According to BSpec, 27 is the max DSC output bpp */
+		bits_per_pixel = min(bits_per_pixel, (u32)27);
+
+		/* According to BSpec, 8 is the min DSC output bpp */
+		bits_per_pixel = max(bits_per_pixel, (u32)8);
 	} else {
 		/* Find the nearest match in the array of known BPPs from VESA */
 		for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) {
@@ -771,6 +777,9 @@  u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
 		min_slice_count = DIV_ROUND_UP(mode_clock,
 					       DP_DSC_MAX_ENC_THROUGHPUT_1);
 
+	if (mode_clock >= ((i915->display.cdclk.max_cdclk_freq * 85) / 100))
+		min_slice_count = max(min_slice_count, (u8)2);
+
 	max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd);
 	if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) {
 		drm_dbg_kms(&i915->drm,
@@ -1597,16 +1606,8 @@  int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
 	 * is greater than the maximum Cdclock and if slice count is even
 	 * then we need to use 2 VDSC instances.
 	 */
-	if (adjusted_mode->crtc_clock > dev_priv->display.cdclk.max_cdclk_freq ||
-	    pipe_config->bigjoiner_pipes) {
-		if (pipe_config->dsc.slice_count > 1) {
-			pipe_config->dsc.dsc_split = true;
-		} else {
-			drm_dbg_kms(&dev_priv->drm,
-				    "Cannot split stream to use 2 VDSC instances\n");
-			return -EINVAL;
-		}
-	}
+	if (pipe_config->bigjoiner_pipes || pipe_config->dsc.slice_count > 1)
+		pipe_config->dsc.dsc_split = true;
 
 	ret = intel_dp_dsc_compute_params(&dig_port->base, pipe_config);
 	if (ret < 0) {