Message ID | 20221128101922.217217-8-ankit.k.nautiyal@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add DSC fractional bpp support | expand |
On Mon, Nov 28, 2022 at 03:49:18PM +0530, Ankit Nautiyal wrote: > MTL+ supports fractional compressed bits_per_pixel, with precision of > 1/16. This compressed bpp is stored in U6.4 format. > Accommodate the precision during calculation of transfer unit data > for hblank_early calculation. > > Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com> > --- > drivers/gpu/drm/i915/display/intel_audio.c | 12 ++++++------ > 1 file changed, 6 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c > index f63d5824aca2..4797040a6362 100644 > --- a/drivers/gpu/drm/i915/display/intel_audio.c > +++ b/drivers/gpu/drm/i915/display/intel_audio.c > @@ -510,14 +510,14 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, > unsigned int link_clks_available, link_clks_required; > unsigned int tu_data, tu_line, link_clks_active; > unsigned int h_active, h_total, hblank_delta, pixel_clk; > - unsigned int fec_coeff, cdclk, vdsc_bpp; > + unsigned int fec_coeff, cdclk, vdsc_bppx16; > unsigned int link_clk, lanes; > unsigned int hblank_rise; > > h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay; > h_total = crtc_state->hw.adjusted_mode.crtc_htotal; > pixel_clk = crtc_state->hw.adjusted_mode.crtc_clock; > - vdsc_bpp = dsc_integral_compressed_bpp(crtc_state->dsc.compressed_bpp); > + vdsc_bppx16 = crtc_state->dsc.compressed_bpp; > cdclk = i915->display.cdclk.hw.cdclk; > /* fec= 0.972261, using rounding multiplier of 1000000 */ > fec_coeff = 972261; > @@ -525,10 +525,10 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, > lanes = crtc_state->lane_count; > > drm_dbg_kms(&i915->drm, "h_active = %u link_clk = %u :" > - "lanes = %u vdsc_bpp = %u cdclk = %u\n", > - h_active, link_clk, lanes, vdsc_bpp, cdclk); > + "lanes = %u vdsc_bppx16 = %u cdclk = %u\n", > + h_active, link_clk, lanes, vdsc_bppx16, cdclk); > > - if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bpp || !cdclk)) > + if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bppx16 || !cdclk)) > return 0; > > link_clks_available = (h_total - h_active) * link_clk / pixel_clk - 28; > @@ -540,7 +540,7 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, > hblank_delta = DIV64_U64_ROUND_UP(mul_u32_u32(5 * (link_clk + cdclk), pixel_clk), > mul_u32_u32(link_clk, cdclk)); > > - tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bpp * 8, 1000000), > + tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bppx16 * 8, 16 * 1000000), I think it should be: tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bppx16 * 8, 1000000), mul_u32_u32(link_clk * lanes * 16, fec_coeff)); i.e you need to divide by 16 but not multiply, because vdsc_bppx16 already stores vdsc_bpp multiplied by 16, which is visible from the logs, during testing it was for example 384 for bpp 24, so no point in multiplying it once again. Stan > mul_u32_u32(link_clk * lanes, fec_coeff)); > tu_line = div64_u64(h_active * mul_u32_u32(link_clk, fec_coeff), > mul_u32_u32(64 * pixel_clk, 1000000)); > -- > 2.25.1 >
On 12/5/2022 1:05 PM, Lisovskiy, Stanislav wrote: > On Mon, Nov 28, 2022 at 03:49:18PM +0530, Ankit Nautiyal wrote: >> MTL+ supports fractional compressed bits_per_pixel, with precision of >> 1/16. This compressed bpp is stored in U6.4 format. >> Accommodate the precision during calculation of transfer unit data >> for hblank_early calculation. >> >> Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com> >> --- >> drivers/gpu/drm/i915/display/intel_audio.c | 12 ++++++------ >> 1 file changed, 6 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c >> index f63d5824aca2..4797040a6362 100644 >> --- a/drivers/gpu/drm/i915/display/intel_audio.c >> +++ b/drivers/gpu/drm/i915/display/intel_audio.c >> @@ -510,14 +510,14 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, >> unsigned int link_clks_available, link_clks_required; >> unsigned int tu_data, tu_line, link_clks_active; >> unsigned int h_active, h_total, hblank_delta, pixel_clk; >> - unsigned int fec_coeff, cdclk, vdsc_bpp; >> + unsigned int fec_coeff, cdclk, vdsc_bppx16; >> unsigned int link_clk, lanes; >> unsigned int hblank_rise; >> >> h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay; >> h_total = crtc_state->hw.adjusted_mode.crtc_htotal; >> pixel_clk = crtc_state->hw.adjusted_mode.crtc_clock; >> - vdsc_bpp = dsc_integral_compressed_bpp(crtc_state->dsc.compressed_bpp); >> + vdsc_bppx16 = crtc_state->dsc.compressed_bpp; >> cdclk = i915->display.cdclk.hw.cdclk; >> /* fec= 0.972261, using rounding multiplier of 1000000 */ >> fec_coeff = 972261; >> @@ -525,10 +525,10 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, >> lanes = crtc_state->lane_count; >> >> drm_dbg_kms(&i915->drm, "h_active = %u link_clk = %u :" >> - "lanes = %u vdsc_bpp = %u cdclk = %u\n", >> - h_active, link_clk, lanes, vdsc_bpp, cdclk); >> + "lanes = %u vdsc_bppx16 = %u cdclk = %u\n", >> + h_active, link_clk, lanes, vdsc_bppx16, cdclk); >> >> - if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bpp || !cdclk)) >> + if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bppx16 || !cdclk)) >> return 0; >> >> link_clks_available = (h_total - h_active) * link_clk / pixel_clk - 28; >> @@ -540,7 +540,7 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, >> hblank_delta = DIV64_U64_ROUND_UP(mul_u32_u32(5 * (link_clk + cdclk), pixel_clk), >> mul_u32_u32(link_clk, cdclk)); >> >> - tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bpp * 8, 1000000), >> + tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bppx16 * 8, 16 * 1000000), > I think it should be: > > tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bppx16 * 8, 1000000), > mul_u32_u32(link_clk * lanes * 16, fec_coeff)); > > i.e you need to divide by 16 but not multiply, because vdsc_bppx16 already > stores vdsc_bpp multiplied by 16, which is visible from the logs, > during testing it was for example 384 for bpp 24, so no point in multiplying > it once again. > > Stan You are right. This is a mistake. Intention was to multiply the denominator by 16, but I goofed up. Thanks for catching this. Will fix this in the next version of the patch. Regards, Ankit >> mul_u32_u32(link_clk * lanes, fec_coeff)); >> tu_line = div64_u64(h_active * mul_u32_u32(link_clk, fec_coeff), >> mul_u32_u32(64 * pixel_clk, 1000000)); >> -- >> 2.25.1 >>
diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index f63d5824aca2..4797040a6362 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -510,14 +510,14 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, unsigned int link_clks_available, link_clks_required; unsigned int tu_data, tu_line, link_clks_active; unsigned int h_active, h_total, hblank_delta, pixel_clk; - unsigned int fec_coeff, cdclk, vdsc_bpp; + unsigned int fec_coeff, cdclk, vdsc_bppx16; unsigned int link_clk, lanes; unsigned int hblank_rise; h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay; h_total = crtc_state->hw.adjusted_mode.crtc_htotal; pixel_clk = crtc_state->hw.adjusted_mode.crtc_clock; - vdsc_bpp = dsc_integral_compressed_bpp(crtc_state->dsc.compressed_bpp); + vdsc_bppx16 = crtc_state->dsc.compressed_bpp; cdclk = i915->display.cdclk.hw.cdclk; /* fec= 0.972261, using rounding multiplier of 1000000 */ fec_coeff = 972261; @@ -525,10 +525,10 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, lanes = crtc_state->lane_count; drm_dbg_kms(&i915->drm, "h_active = %u link_clk = %u :" - "lanes = %u vdsc_bpp = %u cdclk = %u\n", - h_active, link_clk, lanes, vdsc_bpp, cdclk); + "lanes = %u vdsc_bppx16 = %u cdclk = %u\n", + h_active, link_clk, lanes, vdsc_bppx16, cdclk); - if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bpp || !cdclk)) + if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bppx16 || !cdclk)) return 0; link_clks_available = (h_total - h_active) * link_clk / pixel_clk - 28; @@ -540,7 +540,7 @@ static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, hblank_delta = DIV64_U64_ROUND_UP(mul_u32_u32(5 * (link_clk + cdclk), pixel_clk), mul_u32_u32(link_clk, cdclk)); - tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bpp * 8, 1000000), + tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bppx16 * 8, 16 * 1000000), mul_u32_u32(link_clk * lanes, fec_coeff)); tu_line = div64_u64(h_active * mul_u32_u32(link_clk, fec_coeff), mul_u32_u32(64 * pixel_clk, 1000000));
MTL+ supports fractional compressed bits_per_pixel, with precision of 1/16. This compressed bpp is stored in U6.4 format. Accommodate the precision during calculation of transfer unit data for hblank_early calculation. Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com> --- drivers/gpu/drm/i915/display/intel_audio.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)