diff mbox series

[v3,4/4] drm/i915/icl: Add Multi-segmented gamma support

Message ID 20190507135644.23882-5-shashank.sharma@intel.com (mailing list archive)
State New, archived
Headers show
Series Enable Multi-segmented-gamma for ICL | expand

Commit Message

Sharma, Shashank May 7, 2019, 1:56 p.m. UTC
ICL introduces a new gamma correction mode in display engine, called
multi-segmented-gamma mode. This mode allows users to program the
darker region of the gamma curve with sueprfine precision. An
example use case for this is HDR curves (like PQ ST-2084).

If we plot a gamma correction curve from value range between 0.0 to 1.0,
ICL's multi-segment has 3 different sections:
- superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
- fine segment: 257 values, ranges between 0 - 1/(128)
- corase segment: 257 values, ranges between 0 - 1

This patch:
- Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
  so that userspace can program with highest precision supported.
- Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
- Adds functions to program/detect multi-segment gamma.

V2: Addressed review comments from Ville
    - separate function for superfine and fine segments.
    - remove enum for segments.
    - reuse last entry of the LUT as gc_max value.
    - replace if() ....cond with switch...case in icl_load_luts.
    - add an entry variable, instead of 'word'

V3: Addressed review comments from Ville
    - extra newline
    - s/entry/color/
    - remove LUT size checks
    - program ilk_lut_12p4_ldw value before ilk_lut_12p4_udw
    - Change the comments in description of fine and coarse segments,
      and try to make more sense.
    - use 8 * 128 instead of 1024
    - add 1 entry in LUT for GCMAX

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>

Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
Signed-off-by: Uma Shankar <uma.shankar@intel.com>
---
 drivers/gpu/drm/i915/i915_pci.c    |   2 +-
 drivers/gpu/drm/i915/intel_color.c | 127 ++++++++++++++++++++++++++++-
 2 files changed, 124 insertions(+), 5 deletions(-)

Comments

Ville Syrjälä May 7, 2019, 2:27 p.m. UTC | #1
On Tue, May 07, 2019 at 07:26:44PM +0530, Shashank Sharma wrote:
> ICL introduces a new gamma correction mode in display engine, called
> multi-segmented-gamma mode. This mode allows users to program the
> darker region of the gamma curve with sueprfine precision. An
> example use case for this is HDR curves (like PQ ST-2084).
> 
> If we plot a gamma correction curve from value range between 0.0 to 1.0,
> ICL's multi-segment has 3 different sections:
> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
> - fine segment: 257 values, ranges between 0 - 1/(128)
> - corase segment: 257 values, ranges between 0 - 1
> 
> This patch:
> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
>   so that userspace can program with highest precision supported.
> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
> - Adds functions to program/detect multi-segment gamma.
> 
> V2: Addressed review comments from Ville
>     - separate function for superfine and fine segments.
>     - remove enum for segments.
>     - reuse last entry of the LUT as gc_max value.
>     - replace if() ....cond with switch...case in icl_load_luts.
>     - add an entry variable, instead of 'word'
> 
> V3: Addressed review comments from Ville
>     - extra newline
>     - s/entry/color/
>     - remove LUT size checks
>     - program ilk_lut_12p4_ldw value before ilk_lut_12p4_udw
>     - Change the comments in description of fine and coarse segments,
>       and try to make more sense.
>     - use 8 * 128 instead of 1024
>     - add 1 entry in LUT for GCMAX
> 
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
> Signed-off-by: Uma Shankar <uma.shankar@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_pci.c    |   2 +-
>  drivers/gpu/drm/i915/intel_color.c | 127 ++++++++++++++++++++++++++++-
>  2 files changed, 124 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index ffa2ee70a03d..2f99b585d44b 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -749,7 +749,7 @@ static const struct intel_device_info intel_cannonlake_info = {
>  	GEN(11), \
>  	.ddb_size = 2048, \
>  	.has_logical_ring_elsq = 1, \
> -	.color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
> +	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }
>  
>  static const struct intel_device_info intel_icelake_11_info = {
>  	GEN11_FEATURES,
> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
> index 6c341bea514c..c1a9506fd069 100644
> --- a/drivers/gpu/drm/i915/intel_color.c
> +++ b/drivers/gpu/drm/i915/intel_color.c
> @@ -41,6 +41,8 @@
>  #define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
>  
>  #define LEGACY_LUT_LENGTH		256
> +#define ICL_GAMMA_MULTISEG_LUT_LENGTH		(256 * 128 * 8)

Unused.

> +
>  /*
>   * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
>   * format). This macro takes the coefficient we want transformed and the
> @@ -767,6 +769,116 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
>  	}
>  }
>  
> +/* ilk+ "12.4" interpolated format (high 10 bits) */
> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
> +{
> +	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
> +		(color->blue >> 6);
> +}
> +
> +/* ilk+ "12.4" interpolated format (low 6 bits) */
> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
> +{
> +	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
> +		(color->blue & 0x3f);

Blue is missing the shift.

I'm not 100% sure if the ldw vs. udw are the right way around. The spec
has at times been inconsistent with the odd vs. even descriptions,
sometimes even contradicting itself. Also it never really defines
whether it starts counting dwords from from 0 or 1, so not sure what
odd and even actually mean. Can I presume someone has checked this
on actual hardware?

> +}
> +
> +static void
> +icl_load_gcmax(const struct intel_crtc_state *crtc_state,
> +	       const struct drm_color_lut *color)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> +	enum pipe pipe = crtc->pipe;
> +
> +	/* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */
> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red);
> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), color->green);
> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), color->blue);
> +}
> +
> +static void
> +icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> +	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
> +	const struct drm_color_lut *lut = blob->data;
> +	enum pipe pipe = crtc->pipe;
> +	u32 i;
> +
> +	/*
> +	 * Every entry in the multi-segment LUT is corresponding to a superfine
> +	 * segment step which is 1/(8 * 128 * 256).
> +	 *
> +	 * Superfine segment has 9 entries, corresponding to values
> +	 * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
> +	 */
> +	I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
> +
> +	for (i = 0; i < 9; i++) {
> +		const struct drm_color_lut *entry = &lut[i];
> +
> +		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
> +			   ilk_lut_12p4_ldw(entry));
> +		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
> +			   ilk_lut_12p4_udw(entry));
> +	}
> +}
> +
> +static void
> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> +	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
> +	const struct drm_color_lut *lut = blob->data;
> +	const struct drm_color_lut *entry;
> +	enum pipe pipe = crtc->pipe;
> +	u32 i;
> +
> +	/*
> +	 *
> +	 * Program Fine segment (let's call it seg2)...
> +	 *
> +	 * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256),  2/(128*256)
> +	 * ... 256/(128*256). So in order to program fine segment of LUT we
> +	 * need to pick every 8'th entry in LUT, and program 256 indexes.
> +	 *
> +	 * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1],
> +	 * with seg2[0] being unused by the hardware.
> +	 */
> +	I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
> +	for (i = 1; i < 257; i++) {
> +		entry = &lut[i * 8];
> +		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
> +		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
> +	}
> +
> +	/*
> +	 * Program Coarse segment (let's call it seg3)...
> +	 *
> +	 * Coarse segment's starts from index 0 and it's step is 1/256 ie 0,
> +	 * 1/256, 2/256 ...256/256. As per the description of each entry in LUT
> +	 * above, we need to pick every (8 * 128)th entry in LUT, and
> +	 * program 256 of those.
> +	 *
> +	 * Spec is not very clear about if entries seg3[0] and seg3[1] are
> +	 * being used or not, but we still need to program these to advance
> +	 * the index.
> +	 */
> +	for (i = 0; i < 256; i++) {
> +		entry = &lut[i * 8 * 128];
> +		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
> +		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
> +	}
> +
> +	/* The last entry in the LUT is to be programmed in GCMAX */
> +	entry = &lut[256 * 8 * 128 + 1];

The +1 shouldn't be here.

OK, mostly looks good. With the minor issues addressed this is
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> +	icl_load_gcmax(crtc_state, entry);
> +	ivb_load_lut_ext_max(crtc);
> +}
> +
>  static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>  {
>  	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
> @@ -775,10 +887,17 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>  	if (crtc_state->base.degamma_lut)
>  		glk_load_degamma_lut(crtc_state);
>  
> -	if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
> -	    GAMMA_MODE_MODE_8BIT) {
> +	switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) {
> +	case GAMMA_MODE_MODE_8BIT:
>  		i9xx_load_luts(crtc_state);
> -	} else {
> +		break;
> +
> +	case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED:
> +		icl_program_gamma_superfine_segment(crtc_state);
> +		icl_program_gamma_multi_segment(crtc_state);
> +		break;
> +
> +	default:
>  		bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0));
>  		ivb_load_lut_ext_max(crtc);
>  	}
> @@ -1209,7 +1328,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state)
>  	    crtc_state_is_legacy_gamma(crtc_state))
>  		gamma_mode |= GAMMA_MODE_MODE_8BIT;
>  	else
> -		gamma_mode |= GAMMA_MODE_MODE_10BIT;
> +		gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED;
>  
>  	return gamma_mode;
>  }
> -- 
> 2.17.1
Sharma, Shashank May 8, 2019, 1:05 p.m. UTC | #2
On 5/7/2019 7:57 PM, Ville Syrjälä wrote:
> On Tue, May 07, 2019 at 07:26:44PM +0530, Shashank Sharma wrote:
>> ICL introduces a new gamma correction mode in display engine, called
>> multi-segmented-gamma mode. This mode allows users to program the
>> darker region of the gamma curve with sueprfine precision. An
>> example use case for this is HDR curves (like PQ ST-2084).
>>
>> If we plot a gamma correction curve from value range between 0.0 to 1.0,
>> ICL's multi-segment has 3 different sections:
>> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
>> - fine segment: 257 values, ranges between 0 - 1/(128)
>> - corase segment: 257 values, ranges between 0 - 1
>>
>> This patch:
>> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
>>    so that userspace can program with highest precision supported.
>> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
>> - Adds functions to program/detect multi-segment gamma.
>>
>> V2: Addressed review comments from Ville
>>      - separate function for superfine and fine segments.
>>      - remove enum for segments.
>>      - reuse last entry of the LUT as gc_max value.
>>      - replace if() ....cond with switch...case in icl_load_luts.
>>      - add an entry variable, instead of 'word'
>>
>> V3: Addressed review comments from Ville
>>      - extra newline
>>      - s/entry/color/
>>      - remove LUT size checks
>>      - program ilk_lut_12p4_ldw value before ilk_lut_12p4_udw
>>      - Change the comments in description of fine and coarse segments,
>>        and try to make more sense.
>>      - use 8 * 128 instead of 1024
>>      - add 1 entry in LUT for GCMAX
>>
>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>>
>> Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
>> Signed-off-by: Uma Shankar <uma.shankar@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_pci.c    |   2 +-
>>   drivers/gpu/drm/i915/intel_color.c | 127 ++++++++++++++++++++++++++++-
>>   2 files changed, 124 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>> index ffa2ee70a03d..2f99b585d44b 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -749,7 +749,7 @@ static const struct intel_device_info intel_cannonlake_info = {
>>   	GEN(11), \
>>   	.ddb_size = 2048, \
>>   	.has_logical_ring_elsq = 1, \
>> -	.color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
>> +	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }
>>   
>>   static const struct intel_device_info intel_icelake_11_info = {
>>   	GEN11_FEATURES,
>> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
>> index 6c341bea514c..c1a9506fd069 100644
>> --- a/drivers/gpu/drm/i915/intel_color.c
>> +++ b/drivers/gpu/drm/i915/intel_color.c
>> @@ -41,6 +41,8 @@
>>   #define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
>>   
>>   #define LEGACY_LUT_LENGTH		256
>> +#define ICL_GAMMA_MULTISEG_LUT_LENGTH		(256 * 128 * 8)
> Unused.
Got it
>> +
>>   /*
>>    * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
>>    * format). This macro takes the coefficient we want transformed and the
>> @@ -767,6 +769,116 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
>>   	}
>>   }
>>   
>> +/* ilk+ "12.4" interpolated format (high 10 bits) */
>> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
>> +{
>> +	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
>> +		(color->blue >> 6);
>> +}
>> +
>> +/* ilk+ "12.4" interpolated format (low 6 bits) */
>> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
>> +{
>> +	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
>> +		(color->blue & 0x3f);
> Blue is missing the shift.
Ok,
> I'm not 100% sure if the ldw vs. udw are the right way around. The spec
> has at times been inconsistent with the odd vs. even descriptions,
> sometimes even contradicting itself. Also it never really defines
> whether it starts counting dwords from from 0 or 1, so not sure what
> odd and even actually mean. Can I presume someone has checked this
> on actual hardware?
Well, the property was getting set properly, and the display looked ok, 
but dint dump the values in registers. Can check it now.
>> +}
>> +
>> +static void
>> +icl_load_gcmax(const struct intel_crtc_state *crtc_state,
>> +	       const struct drm_color_lut *color)
>> +{
>> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>> +	enum pipe pipe = crtc->pipe;
>> +
>> +	/* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */
>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red);
>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), color->green);
>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), color->blue);
>> +}
>> +
>> +static void
>> +icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
>> +{
>> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>> +	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
>> +	const struct drm_color_lut *lut = blob->data;
>> +	enum pipe pipe = crtc->pipe;
>> +	u32 i;
>> +
>> +	/*
>> +	 * Every entry in the multi-segment LUT is corresponding to a superfine
>> +	 * segment step which is 1/(8 * 128 * 256).
>> +	 *
>> +	 * Superfine segment has 9 entries, corresponding to values
>> +	 * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
>> +	 */
>> +	I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
>> +
>> +	for (i = 0; i < 9; i++) {
>> +		const struct drm_color_lut *entry = &lut[i];
>> +
>> +		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
>> +			   ilk_lut_12p4_ldw(entry));
>> +		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
>> +			   ilk_lut_12p4_udw(entry));
>> +	}
>> +}
>> +
>> +static void
>> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
>> +{
>> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>> +	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
>> +	const struct drm_color_lut *lut = blob->data;
>> +	const struct drm_color_lut *entry;
>> +	enum pipe pipe = crtc->pipe;
>> +	u32 i;
>> +
>> +	/*
>> +	 *
>> +	 * Program Fine segment (let's call it seg2)...
>> +	 *
>> +	 * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256),  2/(128*256)
>> +	 * ... 256/(128*256). So in order to program fine segment of LUT we
>> +	 * need to pick every 8'th entry in LUT, and program 256 indexes.
>> +	 *
>> +	 * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1],
>> +	 * with seg2[0] being unused by the hardware.
>> +	 */
>> +	I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
>> +	for (i = 1; i < 257; i++) {
>> +		entry = &lut[i * 8];
>> +		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
>> +		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
>> +	}
>> +
>> +	/*
>> +	 * Program Coarse segment (let's call it seg3)...
>> +	 *
>> +	 * Coarse segment's starts from index 0 and it's step is 1/256 ie 0,
>> +	 * 1/256, 2/256 ...256/256. As per the description of each entry in LUT
>> +	 * above, we need to pick every (8 * 128)th entry in LUT, and
>> +	 * program 256 of those.
>> +	 *
>> +	 * Spec is not very clear about if entries seg3[0] and seg3[1] are
>> +	 * being used or not, but we still need to program these to advance
>> +	 * the index.
>> +	 */
>> +	for (i = 0; i < 256; i++) {
>> +		entry = &lut[i * 8 * 128];
>> +		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
>> +		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
>> +	}
>> +
>> +	/* The last entry in the LUT is to be programmed in GCMAX */
>> +	entry = &lut[256 * 8 * 128 + 1];
> The +1 shouldn't be here.
ok
> OK, mostly looks good. With the minor issues addressed this is
> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

Thanks for the review, I will publish V4 with comments addressed.

- Shashank

>> +	icl_load_gcmax(crtc_state, entry);
>> +	ivb_load_lut_ext_max(crtc);
>> +}
>> +
>>   static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>>   {
>>   	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
>> @@ -775,10 +887,17 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>>   	if (crtc_state->base.degamma_lut)
>>   		glk_load_degamma_lut(crtc_state);
>>   
>> -	if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
>> -	    GAMMA_MODE_MODE_8BIT) {
>> +	switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) {
>> +	case GAMMA_MODE_MODE_8BIT:
>>   		i9xx_load_luts(crtc_state);
>> -	} else {
>> +		break;
>> +
>> +	case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED:
>> +		icl_program_gamma_superfine_segment(crtc_state);
>> +		icl_program_gamma_multi_segment(crtc_state);
>> +		break;
>> +
>> +	default:
>>   		bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0));
>>   		ivb_load_lut_ext_max(crtc);
>>   	}
>> @@ -1209,7 +1328,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state)
>>   	    crtc_state_is_legacy_gamma(crtc_state))
>>   		gamma_mode |= GAMMA_MODE_MODE_8BIT;
>>   	else
>> -		gamma_mode |= GAMMA_MODE_MODE_10BIT;
>> +		gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED;
>>   
>>   	return gamma_mode;
>>   }
>> -- 
>> 2.17.1
Sharma, Shashank May 8, 2019, 8:03 p.m. UTC | #3
We (Me and Uma) confirmed the ICL register programming sequence, by 
dumping the registers.

The correct sequence should be:

ilk_lut_12p4_udw

ilk_lut_12p4_ldw

We passed maximum value LUT (1.0) and saw only blue output, if 
programmed in opposite sequence.

Programming in above mentioned sequence gives a proper white output.

Regards
Shashank
On 5/8/2019 6:35 PM, Sharma, Shashank wrote:
> On 5/7/2019 7:57 PM, Ville Syrjälä wrote:
>> On Tue, May 07, 2019 at 07:26:44PM +0530, Shashank Sharma wrote:
>>> ICL introduces a new gamma correction mode in display engine, called
>>> multi-segmented-gamma mode. This mode allows users to program the
>>> darker region of the gamma curve with sueprfine precision. An
>>> example use case for this is HDR curves (like PQ ST-2084).
>>>
>>> If we plot a gamma correction curve from value range between 0.0 to 
>>> 1.0,
>>> ICL's multi-segment has 3 different sections:
>>> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
>>> - fine segment: 257 values, ranges between 0 - 1/(128)
>>> - corase segment: 257 values, ranges between 0 - 1
>>>
>>> This patch:
>>> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 
>>> 256),
>>>    so that userspace can program with highest precision supported.
>>> - Changes default gamma mode (non-legacy) to multi-segmented-gamma 
>>> mode.
>>> - Adds functions to program/detect multi-segment gamma.
>>>
>>> V2: Addressed review comments from Ville
>>>      - separate function for superfine and fine segments.
>>>      - remove enum for segments.
>>>      - reuse last entry of the LUT as gc_max value.
>>>      - replace if() ....cond with switch...case in icl_load_luts.
>>>      - add an entry variable, instead of 'word'
>>>
>>> V3: Addressed review comments from Ville
>>>      - extra newline
>>>      - s/entry/color/
>>>      - remove LUT size checks
>>>      - program ilk_lut_12p4_ldw value before ilk_lut_12p4_udw
>>>      - Change the comments in description of fine and coarse segments,
>>>        and try to make more sense.
>>>      - use 8 * 128 instead of 1024
>>>      - add 1 entry in LUT for GCMAX
>>>
>>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>>> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>
>>> Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
>>> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
>>> Signed-off-by: Uma Shankar <uma.shankar@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_pci.c    |   2 +-
>>>   drivers/gpu/drm/i915/intel_color.c | 127 
>>> ++++++++++++++++++++++++++++-
>>>   2 files changed, 124 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>>> b/drivers/gpu/drm/i915/i915_pci.c
>>> index ffa2ee70a03d..2f99b585d44b 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -749,7 +749,7 @@ static const struct intel_device_info 
>>> intel_cannonlake_info = {
>>>       GEN(11), \
>>>       .ddb_size = 2048, \
>>>       .has_logical_ring_elsq = 1, \
>>> -    .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
>>> +    .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }
>>>     static const struct intel_device_info intel_icelake_11_info = {
>>>       GEN11_FEATURES,
>>> diff --git a/drivers/gpu/drm/i915/intel_color.c 
>>> b/drivers/gpu/drm/i915/intel_color.c
>>> index 6c341bea514c..c1a9506fd069 100644
>>> --- a/drivers/gpu/drm/i915/intel_color.c
>>> +++ b/drivers/gpu/drm/i915/intel_color.c
>>> @@ -41,6 +41,8 @@
>>>   #define CTM_COEFF_ABS(coeff)        ((coeff) & (CTM_COEFF_SIGN - 1))
>>>     #define LEGACY_LUT_LENGTH        256
>>> +#define ICL_GAMMA_MULTISEG_LUT_LENGTH        (256 * 128 * 8)
>> Unused.
> Got it
>>> +
>>>   /*
>>>    * Extract the CSC coefficient from a CTM coefficient (in U32.32 
>>> fixed point
>>>    * format). This macro takes the coefficient we want transformed 
>>> and the
>>> @@ -767,6 +769,116 @@ static void glk_load_luts(const struct 
>>> intel_crtc_state *crtc_state)
>>>       }
>>>   }
>>>   +/* ilk+ "12.4" interpolated format (high 10 bits) */
>>> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
>>> +{
>>> +    return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
>>> +        (color->blue >> 6);
>>> +}
>>> +
>>> +/* ilk+ "12.4" interpolated format (low 6 bits) */
>>> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
>>> +{
>>> +    return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
>>> +        (color->blue & 0x3f);
>> Blue is missing the shift.
> Ok,
>> I'm not 100% sure if the ldw vs. udw are the right way around. The spec
>> has at times been inconsistent with the odd vs. even descriptions,
>> sometimes even contradicting itself. Also it never really defines
>> whether it starts counting dwords from from 0 or 1, so not sure what
>> odd and even actually mean. Can I presume someone has checked this
>> on actual hardware?
> Well, the property was getting set properly, and the display looked 
> ok, but dint dump the values in registers. Can check it now.
>>> +}
>>> +
>>> +static void
>>> +icl_load_gcmax(const struct intel_crtc_state *crtc_state,
>>> +           const struct drm_color_lut *color)
>>> +{
>>> +    struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>>> +    struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>>> +    enum pipe pipe = crtc->pipe;
>>> +
>>> +    /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF 
>>> max */
>>> +    I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red);
>>> +    I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), color->green);
>>> +    I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), color->blue);
>>> +}
>>> +
>>> +static void
>>> +icl_program_gamma_superfine_segment(const struct intel_crtc_state 
>>> *crtc_state)
>>> +{
>>> +    struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>>> +    struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>>> +    const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
>>> +    const struct drm_color_lut *lut = blob->data;
>>> +    enum pipe pipe = crtc->pipe;
>>> +    u32 i;
>>> +
>>> +    /*
>>> +     * Every entry in the multi-segment LUT is corresponding to a 
>>> superfine
>>> +     * segment step which is 1/(8 * 128 * 256).
>>> +     *
>>> +     * Superfine segment has 9 entries, corresponding to values
>>> +     * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
>>> +     */
>>> +    I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), 
>>> PAL_PREC_AUTO_INCREMENT);
>>> +
>>> +    for (i = 0; i < 9; i++) {
>>> +        const struct drm_color_lut *entry = &lut[i];
>>> +
>>> +        I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
>>> +               ilk_lut_12p4_ldw(entry));
>>> +        I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
>>> +               ilk_lut_12p4_udw(entry));
>>> +    }
>>> +}
>>> +
>>> +static void
>>> +icl_program_gamma_multi_segment(const struct intel_crtc_state 
>>> *crtc_state)
>>> +{
>>> +    struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>>> +    struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>>> +    const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
>>> +    const struct drm_color_lut *lut = blob->data;
>>> +    const struct drm_color_lut *entry;
>>> +    enum pipe pipe = crtc->pipe;
>>> +    u32 i;
>>> +
>>> +    /*
>>> +     *
>>> +     * Program Fine segment (let's call it seg2)...
>>> +     *
>>> +     * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256),  
>>> 2/(128*256)
>>> +     * ... 256/(128*256). So in order to program fine segment of 
>>> LUT we
>>> +     * need to pick every 8'th entry in LUT, and program 256 indexes.
>>> +     *
>>> +     * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1],
>>> +     * with seg2[0] being unused by the hardware.
>>> +     */
>>> +    I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
>>> +    for (i = 1; i < 257; i++) {
>>> +        entry = &lut[i * 8];
>>> +        I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
>>> +        I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
>>> +    }
>>> +
>>> +    /*
>>> +     * Program Coarse segment (let's call it seg3)...
>>> +     *
>>> +     * Coarse segment's starts from index 0 and it's step is 1/256 
>>> ie 0,
>>> +     * 1/256, 2/256 ...256/256. As per the description of each 
>>> entry in LUT
>>> +     * above, we need to pick every (8 * 128)th entry in LUT, and
>>> +     * program 256 of those.
>>> +     *
>>> +     * Spec is not very clear about if entries seg3[0] and seg3[1] are
>>> +     * being used or not, but we still need to program these to 
>>> advance
>>> +     * the index.
>>> +     */
>>> +    for (i = 0; i < 256; i++) {
>>> +        entry = &lut[i * 8 * 128];
>>> +        I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
>>> +        I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
>>> +    }
>>> +
>>> +    /* The last entry in the LUT is to be programmed in GCMAX */
>>> +    entry = &lut[256 * 8 * 128 + 1];
>> The +1 shouldn't be here.
> ok
>> OK, mostly looks good. With the minor issues addressed this is
>> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> Thanks for the review, I will publish V4 with comments addressed.
>
> - Shashank
>
>>> +    icl_load_gcmax(crtc_state, entry);
>>> +    ivb_load_lut_ext_max(crtc);
>>> +}
>>> +
>>>   static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>>>   {
>>>       const struct drm_property_blob *gamma_lut = 
>>> crtc_state->base.gamma_lut;
>>> @@ -775,10 +887,17 @@ static void icl_load_luts(const struct 
>>> intel_crtc_state *crtc_state)
>>>       if (crtc_state->base.degamma_lut)
>>>           glk_load_degamma_lut(crtc_state);
>>>   -    if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
>>> -        GAMMA_MODE_MODE_8BIT) {
>>> +    switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) {
>>> +    case GAMMA_MODE_MODE_8BIT:
>>>           i9xx_load_luts(crtc_state);
>>> -    } else {
>>> +        break;
>>> +
>>> +    case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED:
>>> +        icl_program_gamma_superfine_segment(crtc_state);
>>> +        icl_program_gamma_multi_segment(crtc_state);
>>> +        break;
>>> +
>>> +    default:
>>>           bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0));
>>>           ivb_load_lut_ext_max(crtc);
>>>       }
>>> @@ -1209,7 +1328,7 @@ static u32 icl_gamma_mode(const struct 
>>> intel_crtc_state *crtc_state)
>>>           crtc_state_is_legacy_gamma(crtc_state))
>>>           gamma_mode |= GAMMA_MODE_MODE_8BIT;
>>>       else
>>> -        gamma_mode |= GAMMA_MODE_MODE_10BIT;
>>> +        gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED;
>>>         return gamma_mode;
>>>   }
>>> -- 
>>> 2.17.1
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index ffa2ee70a03d..2f99b585d44b 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -749,7 +749,7 @@  static const struct intel_device_info intel_cannonlake_info = {
 	GEN(11), \
 	.ddb_size = 2048, \
 	.has_logical_ring_elsq = 1, \
-	.color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
+	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
index 6c341bea514c..c1a9506fd069 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -41,6 +41,8 @@ 
 #define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
 
 #define LEGACY_LUT_LENGTH		256
+#define ICL_GAMMA_MULTISEG_LUT_LENGTH		(256 * 128 * 8)
+
 /*
  * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
  * format). This macro takes the coefficient we want transformed and the
@@ -767,6 +769,116 @@  static void glk_load_luts(const struct intel_crtc_state *crtc_state)
 	}
 }
 
+/* ilk+ "12.4" interpolated format (high 10 bits) */
+static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
+{
+	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
+		(color->blue >> 6);
+}
+
+/* ilk+ "12.4" interpolated format (low 6 bits) */
+static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
+{
+	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
+		(color->blue & 0x3f);
+}
+
+static void
+icl_load_gcmax(const struct intel_crtc_state *crtc_state,
+	       const struct drm_color_lut *color)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
+
+	/* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */
+	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red);
+	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), color->green);
+	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), color->blue);
+}
+
+static void
+icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
+	const struct drm_color_lut *lut = blob->data;
+	enum pipe pipe = crtc->pipe;
+	u32 i;
+
+	/*
+	 * Every entry in the multi-segment LUT is corresponding to a superfine
+	 * segment step which is 1/(8 * 128 * 256).
+	 *
+	 * Superfine segment has 9 entries, corresponding to values
+	 * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
+	 */
+	I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
+
+	for (i = 0; i < 9; i++) {
+		const struct drm_color_lut *entry = &lut[i];
+
+		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
+			   ilk_lut_12p4_ldw(entry));
+		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
+			   ilk_lut_12p4_udw(entry));
+	}
+}
+
+static void
+icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
+	const struct drm_color_lut *lut = blob->data;
+	const struct drm_color_lut *entry;
+	enum pipe pipe = crtc->pipe;
+	u32 i;
+
+	/*
+	 *
+	 * Program Fine segment (let's call it seg2)...
+	 *
+	 * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256),  2/(128*256)
+	 * ... 256/(128*256). So in order to program fine segment of LUT we
+	 * need to pick every 8'th entry in LUT, and program 256 indexes.
+	 *
+	 * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1],
+	 * with seg2[0] being unused by the hardware.
+	 */
+	I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
+	for (i = 1; i < 257; i++) {
+		entry = &lut[i * 8];
+		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
+		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
+	}
+
+	/*
+	 * Program Coarse segment (let's call it seg3)...
+	 *
+	 * Coarse segment's starts from index 0 and it's step is 1/256 ie 0,
+	 * 1/256, 2/256 ...256/256. As per the description of each entry in LUT
+	 * above, we need to pick every (8 * 128)th entry in LUT, and
+	 * program 256 of those.
+	 *
+	 * Spec is not very clear about if entries seg3[0] and seg3[1] are
+	 * being used or not, but we still need to program these to advance
+	 * the index.
+	 */
+	for (i = 0; i < 256; i++) {
+		entry = &lut[i * 8 * 128];
+		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
+		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
+	}
+
+	/* The last entry in the LUT is to be programmed in GCMAX */
+	entry = &lut[256 * 8 * 128 + 1];
+	icl_load_gcmax(crtc_state, entry);
+	ivb_load_lut_ext_max(crtc);
+}
+
 static void icl_load_luts(const struct intel_crtc_state *crtc_state)
 {
 	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
@@ -775,10 +887,17 @@  static void icl_load_luts(const struct intel_crtc_state *crtc_state)
 	if (crtc_state->base.degamma_lut)
 		glk_load_degamma_lut(crtc_state);
 
-	if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
-	    GAMMA_MODE_MODE_8BIT) {
+	switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) {
+	case GAMMA_MODE_MODE_8BIT:
 		i9xx_load_luts(crtc_state);
-	} else {
+		break;
+
+	case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED:
+		icl_program_gamma_superfine_segment(crtc_state);
+		icl_program_gamma_multi_segment(crtc_state);
+		break;
+
+	default:
 		bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0));
 		ivb_load_lut_ext_max(crtc);
 	}
@@ -1209,7 +1328,7 @@  static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state)
 	    crtc_state_is_legacy_gamma(crtc_state))
 		gamma_mode |= GAMMA_MODE_MODE_8BIT;
 	else
-		gamma_mode |= GAMMA_MODE_MODE_10BIT;
+		gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED;
 
 	return gamma_mode;
 }