diff mbox series

[07/11] drm/i915/audio : Consider fractional vdsc bpp while computing tu_data

Message ID 20221128101922.217217-8-ankit.k.nautiyal@intel.com (mailing list archive)
State New, archived
Headers show
Series Add DSC fractional bpp support | expand

Commit Message

Ankit Nautiyal Nov. 28, 2022, 10:19 a.m. UTC
MTL+ supports fractional compressed bits_per_pixel, with precision of
1/16. This compressed bpp is stored in U6.4 format.
Accommodate the precision during calculation of transfer unit data
for hblank_early calculation.

Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
---
 drivers/gpu/drm/i915/display/intel_audio.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

Comments

Stanislav Lisovskiy Dec. 5, 2022, 7:35 a.m. UTC | #1
On Mon, Nov 28, 2022 at 03:49:18PM +0530, Ankit Nautiyal wrote:
> MTL+ supports fractional compressed bits_per_pixel, with precision of
> 1/16. This compressed bpp is stored in U6.4 format.
> Accommodate the precision during calculation of transfer unit data
> for hblank_early calculation.
> 
> Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_audio.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
> index f63d5824aca2..4797040a6362 100644
> --- a/drivers/gpu/drm/i915/display/intel_audio.c
> +++ b/drivers/gpu/drm/i915/display/intel_audio.c
> @@ -510,14 +510,14 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
>  	unsigned int link_clks_available, link_clks_required;
>  	unsigned int tu_data, tu_line, link_clks_active;
>  	unsigned int h_active, h_total, hblank_delta, pixel_clk;
> -	unsigned int fec_coeff, cdclk, vdsc_bpp;
> +	unsigned int fec_coeff, cdclk, vdsc_bppx16;
>  	unsigned int link_clk, lanes;
>  	unsigned int hblank_rise;
>  
>  	h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay;
>  	h_total = crtc_state->hw.adjusted_mode.crtc_htotal;
>  	pixel_clk = crtc_state->hw.adjusted_mode.crtc_clock;
> -	vdsc_bpp = dsc_integral_compressed_bpp(crtc_state->dsc.compressed_bpp);
> +	vdsc_bppx16 = crtc_state->dsc.compressed_bpp;
>  	cdclk = i915->display.cdclk.hw.cdclk;
>  	/* fec= 0.972261, using rounding multiplier of 1000000 */
>  	fec_coeff = 972261;
> @@ -525,10 +525,10 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
>  	lanes = crtc_state->lane_count;
>  
>  	drm_dbg_kms(&i915->drm, "h_active = %u link_clk = %u :"
> -		    "lanes = %u vdsc_bpp = %u cdclk = %u\n",
> -		    h_active, link_clk, lanes, vdsc_bpp, cdclk);
> +		    "lanes = %u vdsc_bppx16 = %u cdclk = %u\n",
> +		    h_active, link_clk, lanes, vdsc_bppx16, cdclk);
>  
> -	if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bpp || !cdclk))
> +	if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bppx16 || !cdclk))
>  		return 0;
>  
>  	link_clks_available = (h_total - h_active) * link_clk / pixel_clk - 28;
> @@ -540,7 +540,7 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
>  		hblank_delta = DIV64_U64_ROUND_UP(mul_u32_u32(5 * (link_clk + cdclk), pixel_clk),
>  						  mul_u32_u32(link_clk, cdclk));
>  
> -	tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bpp * 8, 1000000),
> +	tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bppx16 * 8, 16 * 1000000),

I think it should be:

tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bppx16 * 8, 1000000),
                    mul_u32_u32(link_clk * lanes * 16, fec_coeff));

i.e you need to divide by 16 but not multiply, because vdsc_bppx16 already
stores vdsc_bpp multiplied by 16, which is visible from the logs,
during testing it was for example 384 for bpp 24, so no point in multiplying
it once again.

Stan

>  			    mul_u32_u32(link_clk * lanes, fec_coeff));
>  	tu_line = div64_u64(h_active * mul_u32_u32(link_clk, fec_coeff),
>  			    mul_u32_u32(64 * pixel_clk, 1000000));
> -- 
> 2.25.1
>
Ankit Nautiyal Dec. 6, 2022, 10:19 a.m. UTC | #2
On 12/5/2022 1:05 PM, Lisovskiy, Stanislav wrote:
> On Mon, Nov 28, 2022 at 03:49:18PM +0530, Ankit Nautiyal wrote:
>> MTL+ supports fractional compressed bits_per_pixel, with precision of
>> 1/16. This compressed bpp is stored in U6.4 format.
>> Accommodate the precision during calculation of transfer unit data
>> for hblank_early calculation.
>>
>> Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
>> ---
>>   drivers/gpu/drm/i915/display/intel_audio.c | 12 ++++++------
>>   1 file changed, 6 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
>> index f63d5824aca2..4797040a6362 100644
>> --- a/drivers/gpu/drm/i915/display/intel_audio.c
>> +++ b/drivers/gpu/drm/i915/display/intel_audio.c
>> @@ -510,14 +510,14 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
>>   	unsigned int link_clks_available, link_clks_required;
>>   	unsigned int tu_data, tu_line, link_clks_active;
>>   	unsigned int h_active, h_total, hblank_delta, pixel_clk;
>> -	unsigned int fec_coeff, cdclk, vdsc_bpp;
>> +	unsigned int fec_coeff, cdclk, vdsc_bppx16;
>>   	unsigned int link_clk, lanes;
>>   	unsigned int hblank_rise;
>>   
>>   	h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay;
>>   	h_total = crtc_state->hw.adjusted_mode.crtc_htotal;
>>   	pixel_clk = crtc_state->hw.adjusted_mode.crtc_clock;
>> -	vdsc_bpp = dsc_integral_compressed_bpp(crtc_state->dsc.compressed_bpp);
>> +	vdsc_bppx16 = crtc_state->dsc.compressed_bpp;
>>   	cdclk = i915->display.cdclk.hw.cdclk;
>>   	/* fec= 0.972261, using rounding multiplier of 1000000 */
>>   	fec_coeff = 972261;
>> @@ -525,10 +525,10 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
>>   	lanes = crtc_state->lane_count;
>>   
>>   	drm_dbg_kms(&i915->drm, "h_active = %u link_clk = %u :"
>> -		    "lanes = %u vdsc_bpp = %u cdclk = %u\n",
>> -		    h_active, link_clk, lanes, vdsc_bpp, cdclk);
>> +		    "lanes = %u vdsc_bppx16 = %u cdclk = %u\n",
>> +		    h_active, link_clk, lanes, vdsc_bppx16, cdclk);
>>   
>> -	if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bpp || !cdclk))
>> +	if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bppx16 || !cdclk))
>>   		return 0;
>>   
>>   	link_clks_available = (h_total - h_active) * link_clk / pixel_clk - 28;
>> @@ -540,7 +540,7 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
>>   		hblank_delta = DIV64_U64_ROUND_UP(mul_u32_u32(5 * (link_clk + cdclk), pixel_clk),
>>   						  mul_u32_u32(link_clk, cdclk));
>>   
>> -	tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bpp * 8, 1000000),
>> +	tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bppx16 * 8, 16 * 1000000),
> I think it should be:
>
> tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bppx16 * 8, 1000000),
>                      mul_u32_u32(link_clk * lanes * 16, fec_coeff));
>
> i.e you need to divide by 16 but not multiply, because vdsc_bppx16 already
> stores vdsc_bpp multiplied by 16, which is visible from the logs,
> during testing it was for example 384 for bpp 24, so no point in multiplying
> it once again.
>
> Stan

You are right. This is a mistake. Intention was to multiply the 
denominator by 16, but I goofed up.

Thanks for catching this. Will fix this in the next version of the patch.


Regards,

Ankit


>>   			    mul_u32_u32(link_clk * lanes, fec_coeff));
>>   	tu_line = div64_u64(h_active * mul_u32_u32(link_clk, fec_coeff),
>>   			    mul_u32_u32(64 * pixel_clk, 1000000));
>> -- 
>> 2.25.1
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
index f63d5824aca2..4797040a6362 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -510,14 +510,14 @@  static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
 	unsigned int link_clks_available, link_clks_required;
 	unsigned int tu_data, tu_line, link_clks_active;
 	unsigned int h_active, h_total, hblank_delta, pixel_clk;
-	unsigned int fec_coeff, cdclk, vdsc_bpp;
+	unsigned int fec_coeff, cdclk, vdsc_bppx16;
 	unsigned int link_clk, lanes;
 	unsigned int hblank_rise;
 
 	h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay;
 	h_total = crtc_state->hw.adjusted_mode.crtc_htotal;
 	pixel_clk = crtc_state->hw.adjusted_mode.crtc_clock;
-	vdsc_bpp = dsc_integral_compressed_bpp(crtc_state->dsc.compressed_bpp);
+	vdsc_bppx16 = crtc_state->dsc.compressed_bpp;
 	cdclk = i915->display.cdclk.hw.cdclk;
 	/* fec= 0.972261, using rounding multiplier of 1000000 */
 	fec_coeff = 972261;
@@ -525,10 +525,10 @@  static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
 	lanes = crtc_state->lane_count;
 
 	drm_dbg_kms(&i915->drm, "h_active = %u link_clk = %u :"
-		    "lanes = %u vdsc_bpp = %u cdclk = %u\n",
-		    h_active, link_clk, lanes, vdsc_bpp, cdclk);
+		    "lanes = %u vdsc_bppx16 = %u cdclk = %u\n",
+		    h_active, link_clk, lanes, vdsc_bppx16, cdclk);
 
-	if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bpp || !cdclk))
+	if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bppx16 || !cdclk))
 		return 0;
 
 	link_clks_available = (h_total - h_active) * link_clk / pixel_clk - 28;
@@ -540,7 +540,7 @@  static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
 		hblank_delta = DIV64_U64_ROUND_UP(mul_u32_u32(5 * (link_clk + cdclk), pixel_clk),
 						  mul_u32_u32(link_clk, cdclk));
 
-	tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bpp * 8, 1000000),
+	tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bppx16 * 8, 16 * 1000000),
 			    mul_u32_u32(link_clk * lanes, fec_coeff));
 	tu_line = div64_u64(h_active * mul_u32_u32(link_clk, fec_coeff),
 			    mul_u32_u32(64 * pixel_clk, 1000000));