diff mbox series

[07/19] drm/i915/intel_cdclk: Add vdsc with bigjoiner constraints on min_cdlck

Message ID 20230713103346.1163315-8-ankit.k.nautiyal@intel.com (mailing list archive)
State New, archived
Headers show
Series DSC misc fixes | expand

Commit Message

Ankit Nautiyal July 13, 2023, 10:33 a.m. UTC
As per Bsepc:49259, Bigjoiner BW check puts restriction on the
compressed bpp for a given CDCLK, pixelclock in cases where
Bigjoiner + DSC are used.

Currently compressed bpp is computed first, and it is ensured that
the bpp will work at least with the max CDCLK freq.

Since the CDCLK is computed later, lets account for Bigjoiner BW
check while calculating Min CDCLK.

v2: Use pixel clock in the bw calculations. (Ville)

Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
---
 drivers/gpu/drm/i915/display/intel_cdclk.c | 61 +++++++++++++++++-----
 1 file changed, 47 insertions(+), 14 deletions(-)

Comments

Stanislav Lisovskiy July 20, 2023, 9:24 a.m. UTC | #1
On Thu, Jul 13, 2023 at 04:03:34PM +0530, Ankit Nautiyal wrote:
> As per Bsepc:49259, Bigjoiner BW check puts restriction on the
> compressed bpp for a given CDCLK, pixelclock in cases where
> Bigjoiner + DSC are used.
> 
> Currently compressed bpp is computed first, and it is ensured that
> the bpp will work at least with the max CDCLK freq.
> 
> Since the CDCLK is computed later, lets account for Bigjoiner BW
> check while calculating Min CDCLK.
> 
> v2: Use pixel clock in the bw calculations. (Ville)
> 
> Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_cdclk.c | 61 +++++++++++++++++-----
>  1 file changed, 47 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
> index 701909966545..788dba576294 100644
> --- a/drivers/gpu/drm/i915/display/intel_cdclk.c
> +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
> @@ -2533,6 +2533,51 @@ static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state)
>  	return min_cdclk;
>  }
>  
> +static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
> +	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
> +	int num_vdsc_instances = intel_dsc_get_num_vdsc_instances(crtc_state);
> +	int min_cdclk = 0;
> +
> +	/*
> +	 * When we decide to use only one VDSC engine, since
> +	 * each VDSC operates with 1 ppc throughput, pixel clock
> +	 * cannot be higher than the VDSC clock (cdclk)
> +	 * If there 2 VDSC engines, then pixel clock can't be higher than
> +	 * VDSC clock(cdclk) * 2 and so on.
> +	 */
> +	min_cdclk = max_t(int, min_cdclk,
> +			  DIV_ROUND_UP(crtc_state->pixel_rate, num_vdsc_instances));
> +
> +	if (crtc_state->bigjoiner_pipes) {
> +		int pixel_clock = crtc_state->hw.adjusted_mode.clock;
> +
> +		/*
> +		 * According to Bigjoiner bw check:
> +		 * compressed_bpp <= PPC * CDCLK * Big joiner Interface bits / Pixel clock
> +		 *
> +		 * We have already computed compressed_bpp, so now compute the min CDCLK that
> +		 * is required to support this compressed_bpp.
> +		 *
> +		 * => CDCLK >= compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits)
> +		 *
> +		 * Since PPC = 2 with bigjoiner
> +		 * => CDCLK >= compressed_bpp * Pixel clock  / 2 * Bigjoiner Interface bits
> +		 *
> +		 * #TODO Bspec mentions to account for FEC overhead while using pixel clock.
> +		 * Check if we need to use FEC overhead in the above calculations.

There is already some function used in intel_dp.c:

intel_dp_mode_to_fec_clock(mode_clock) => Should you may be just use that one, to account FEC
overhead?

> +		 */
> +		int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24;
> +		int min_cdclk_bj = (crtc_state->dsc.compressed_bpp * pixel_clock) /
> +				   (2 * bigjoiner_interface_bits);

I would use "num_vdsc_instances" instead of 2, since we even get those explicitly.

> +
> +		min_cdclk = max(min_cdclk, min_cdclk_bj);
> +	}
> +
> +	return min_cdclk;
> +}
> +
>  int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
>  {
>  	struct drm_i915_private *dev_priv =
> @@ -2604,20 +2649,8 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
>  	/* Account for additional needs from the planes */
>  	min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk);
>  
> -	/*
> -	 * When we decide to use only one VDSC engine, since
> -	 * each VDSC operates with 1 ppc throughput, pixel clock
> -	 * cannot be higher than the VDSC clock (cdclk)
> -	 * If there 2 VDSC engines, then pixel clock can't be higher than
> -	 * VDSC clock(cdclk) * 2 and so on.
> -	 */
> -	if (crtc_state->dsc.compression_enable) {
> -		int num_vdsc_instances = intel_dsc_get_num_vdsc_instances(crtc_state);
> -
> -		min_cdclk = max_t(int, min_cdclk,
> -				  DIV_ROUND_UP(crtc_state->pixel_rate,
> -					       num_vdsc_instances));
> -	}
> +	if (crtc_state->dsc.compression_enable)
> +		min_cdclk = max(min_cdclk, intel_vdsc_min_cdclk(crtc_state));


With notes above taken care of:

Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>

>  
>  	/*
>  	 * HACK. Currently for TGL/DG2 platforms we calculate
> -- 
> 2.40.1
>
Ankit Nautiyal July 25, 2023, 6:01 a.m. UTC | #2
On 7/20/2023 2:54 PM, Lisovskiy, Stanislav wrote:
> On Thu, Jul 13, 2023 at 04:03:34PM +0530, Ankit Nautiyal wrote:
>> As per Bsepc:49259, Bigjoiner BW check puts restriction on the
>> compressed bpp for a given CDCLK, pixelclock in cases where
>> Bigjoiner + DSC are used.
>>
>> Currently compressed bpp is computed first, and it is ensured that
>> the bpp will work at least with the max CDCLK freq.
>>
>> Since the CDCLK is computed later, lets account for Bigjoiner BW
>> check while calculating Min CDCLK.
>>
>> v2: Use pixel clock in the bw calculations. (Ville)
>>
>> Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
>> ---
>>   drivers/gpu/drm/i915/display/intel_cdclk.c | 61 +++++++++++++++++-----
>>   1 file changed, 47 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
>> index 701909966545..788dba576294 100644
>> --- a/drivers/gpu/drm/i915/display/intel_cdclk.c
>> +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
>> @@ -2533,6 +2533,51 @@ static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state)
>>   	return min_cdclk;
>>   }
>>   
>> +static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state)
>> +{
>> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
>> +	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
>> +	int num_vdsc_instances = intel_dsc_get_num_vdsc_instances(crtc_state);
>> +	int min_cdclk = 0;
>> +
>> +	/*
>> +	 * When we decide to use only one VDSC engine, since
>> +	 * each VDSC operates with 1 ppc throughput, pixel clock
>> +	 * cannot be higher than the VDSC clock (cdclk)
>> +	 * If there 2 VDSC engines, then pixel clock can't be higher than
>> +	 * VDSC clock(cdclk) * 2 and so on.
>> +	 */
>> +	min_cdclk = max_t(int, min_cdclk,
>> +			  DIV_ROUND_UP(crtc_state->pixel_rate, num_vdsc_instances));
>> +
>> +	if (crtc_state->bigjoiner_pipes) {
>> +		int pixel_clock = crtc_state->hw.adjusted_mode.clock;
>> +
>> +		/*
>> +		 * According to Bigjoiner bw check:
>> +		 * compressed_bpp <= PPC * CDCLK * Big joiner Interface bits / Pixel clock
>> +		 *
>> +		 * We have already computed compressed_bpp, so now compute the min CDCLK that
>> +		 * is required to support this compressed_bpp.
>> +		 *
>> +		 * => CDCLK >= compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits)
>> +		 *
>> +		 * Since PPC = 2 with bigjoiner
>> +		 * => CDCLK >= compressed_bpp * Pixel clock  / 2 * Bigjoiner Interface bits
>> +		 *
>> +		 * #TODO Bspec mentions to account for FEC overhead while using pixel clock.
>> +		 * Check if we need to use FEC overhead in the above calculations.
> There is already some function used in intel_dp.c:
>
> intel_dp_mode_to_fec_clock(mode_clock) => Should you may be just use that one, to account FEC
> overhead?

Hmm right I agree, I can use that here, thanks!


>
>> +		 */
>> +		int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24;
>> +		int min_cdclk_bj = (crtc_state->dsc.compressed_bpp * pixel_clock) /
>> +				   (2 * bigjoiner_interface_bits);
> I would use "num_vdsc_instances" instead of 2, since we even get those explicitly.

Currently for the bigjoiner case, the num_vdsc_instances returns 4 for 
the pipes. Should we have a function to have num_vdsc_instances_per_pipe?

Or perhaps one function for just getting the PPC which will be 2 for 
Display>=10 and when dsc_split is used, and 1 otherwise?


Thanks & Regards,

Ankit


>
>> +
>> +		min_cdclk = max(min_cdclk, min_cdclk_bj);
>> +	}
>> +
>> +	return min_cdclk;
>> +}
>> +
>>   int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
>>   {
>>   	struct drm_i915_private *dev_priv =
>> @@ -2604,20 +2649,8 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
>>   	/* Account for additional needs from the planes */
>>   	min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk);
>>   
>> -	/*
>> -	 * When we decide to use only one VDSC engine, since
>> -	 * each VDSC operates with 1 ppc throughput, pixel clock
>> -	 * cannot be higher than the VDSC clock (cdclk)
>> -	 * If there 2 VDSC engines, then pixel clock can't be higher than
>> -	 * VDSC clock(cdclk) * 2 and so on.
>> -	 */
>> -	if (crtc_state->dsc.compression_enable) {
>> -		int num_vdsc_instances = intel_dsc_get_num_vdsc_instances(crtc_state);
>> -
>> -		min_cdclk = max_t(int, min_cdclk,
>> -				  DIV_ROUND_UP(crtc_state->pixel_rate,
>> -					       num_vdsc_instances));
>> -	}
>> +	if (crtc_state->dsc.compression_enable)
>> +		min_cdclk = max(min_cdclk, intel_vdsc_min_cdclk(crtc_state));
>
> With notes above taken care of:
>
> Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
>
>>   
>>   	/*
>>   	 * HACK. Currently for TGL/DG2 platforms we calculate
>> -- 
>> 2.40.1
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 701909966545..788dba576294 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2533,6 +2533,51 @@  static int intel_planes_min_cdclk(const struct intel_crtc_state *crtc_state)
 	return min_cdclk;
 }
 
+static int intel_vdsc_min_cdclk(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	int num_vdsc_instances = intel_dsc_get_num_vdsc_instances(crtc_state);
+	int min_cdclk = 0;
+
+	/*
+	 * When we decide to use only one VDSC engine, since
+	 * each VDSC operates with 1 ppc throughput, pixel clock
+	 * cannot be higher than the VDSC clock (cdclk)
+	 * If there 2 VDSC engines, then pixel clock can't be higher than
+	 * VDSC clock(cdclk) * 2 and so on.
+	 */
+	min_cdclk = max_t(int, min_cdclk,
+			  DIV_ROUND_UP(crtc_state->pixel_rate, num_vdsc_instances));
+
+	if (crtc_state->bigjoiner_pipes) {
+		int pixel_clock = crtc_state->hw.adjusted_mode.clock;
+
+		/*
+		 * According to Bigjoiner bw check:
+		 * compressed_bpp <= PPC * CDCLK * Big joiner Interface bits / Pixel clock
+		 *
+		 * We have already computed compressed_bpp, so now compute the min CDCLK that
+		 * is required to support this compressed_bpp.
+		 *
+		 * => CDCLK >= compressed_bpp * Pixel clock / (PPC * Bigjoiner Interface bits)
+		 *
+		 * Since PPC = 2 with bigjoiner
+		 * => CDCLK >= compressed_bpp * Pixel clock  / 2 * Bigjoiner Interface bits
+		 *
+		 * #TODO Bspec mentions to account for FEC overhead while using pixel clock.
+		 * Check if we need to use FEC overhead in the above calculations.
+		 */
+		int bigjoiner_interface_bits = DISPLAY_VER(i915) > 13 ? 36 : 24;
+		int min_cdclk_bj = (crtc_state->dsc.compressed_bpp * pixel_clock) /
+				   (2 * bigjoiner_interface_bits);
+
+		min_cdclk = max(min_cdclk, min_cdclk_bj);
+	}
+
+	return min_cdclk;
+}
+
 int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv =
@@ -2604,20 +2649,8 @@  int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
 	/* Account for additional needs from the planes */
 	min_cdclk = max(intel_planes_min_cdclk(crtc_state), min_cdclk);
 
-	/*
-	 * When we decide to use only one VDSC engine, since
-	 * each VDSC operates with 1 ppc throughput, pixel clock
-	 * cannot be higher than the VDSC clock (cdclk)
-	 * If there 2 VDSC engines, then pixel clock can't be higher than
-	 * VDSC clock(cdclk) * 2 and so on.
-	 */
-	if (crtc_state->dsc.compression_enable) {
-		int num_vdsc_instances = intel_dsc_get_num_vdsc_instances(crtc_state);
-
-		min_cdclk = max_t(int, min_cdclk,
-				  DIV_ROUND_UP(crtc_state->pixel_rate,
-					       num_vdsc_instances));
-	}
+	if (crtc_state->dsc.compression_enable)
+		min_cdclk = max(min_cdclk, intel_vdsc_min_cdclk(crtc_state));
 
 	/*
 	 * HACK. Currently for TGL/DG2 platforms we calculate