diff mbox

[4/5] drm/i915: Implement color management on bdw/skl/bxt/kbl

Message ID 1456150691-20231-5-git-send-email-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lionel Landwerlin Feb. 22, 2016, 2:18 p.m. UTC
Patch based on a previous series by Shashank Sharma.

v2: Do not read GAMMA_MODE register to figure what mode we're in

v3: Program PREC_PAL_GC_MAX to clamp pixel values > 1.0

    Add documentation on how the Broadcast RGB property is affected by CTM

v4: Update contributors

v5: Refactor degamma/gamma LUTs load into a single function

v6: Fix missing intel_crtc variable (bisect issue)

Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Kumar, Kiran S <kiran.s.kumar@intel.com>
Signed-off-by: Kausal Malladi <kausalmalladi@gmail.com>
---
 Documentation/DocBook/gpu.tmpl       |   6 +-
 drivers/gpu/drm/i915/i915_drv.c      |  24 ++-
 drivers/gpu/drm/i915/i915_drv.h      |   6 +
 drivers/gpu/drm/i915/i915_reg.h      |  22 +++
 drivers/gpu/drm/i915/intel_color.c   | 356 ++++++++++++++++++++++++++++++-----
 drivers/gpu/drm/i915/intel_display.c |  22 ++-
 drivers/gpu/drm/i915/intel_drv.h     |   3 +-
 drivers/gpu/drm/i915/intel_fbdev.c   |   8 +
 8 files changed, 383 insertions(+), 64 deletions(-)

Comments

Matt Roper Feb. 23, 2016, 12:52 a.m. UTC | #1
On Mon, Feb 22, 2016 at 02:18:10PM +0000, Lionel Landwerlin wrote:
> Patch based on a previous series by Shashank Sharma.
> 
> v2: Do not read GAMMA_MODE register to figure what mode we're in
> 
> v3: Program PREC_PAL_GC_MAX to clamp pixel values > 1.0
> 
>     Add documentation on how the Broadcast RGB property is affected by CTM
> 
> v4: Update contributors
> 
> v5: Refactor degamma/gamma LUTs load into a single function
> 
> v6: Fix missing intel_crtc variable (bisect issue)

Not sure if you saw me feedback on v4 or not:
   https://lists.freedesktop.org/archives/intel-gfx/2016-February/088043.html

It looks like most of those comments still apply here.  If you disagree
with my comments, that's fine too, I just wanted to make sure it didn't
get overlooked.  :-)


Matt

> 
> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Kumar, Kiran S <kiran.s.kumar@intel.com>
> Signed-off-by: Kausal Malladi <kausalmalladi@gmail.com>
> ---
>  Documentation/DocBook/gpu.tmpl       |   6 +-
>  drivers/gpu/drm/i915/i915_drv.c      |  24 ++-
>  drivers/gpu/drm/i915/i915_drv.h      |   6 +
>  drivers/gpu/drm/i915/i915_reg.h      |  22 +++
>  drivers/gpu/drm/i915/intel_color.c   | 356 ++++++++++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/intel_display.c |  22 ++-
>  drivers/gpu/drm/i915/intel_drv.h     |   3 +-
>  drivers/gpu/drm/i915/intel_fbdev.c   |   8 +
>  8 files changed, 383 insertions(+), 64 deletions(-)
> 
> diff --git a/Documentation/DocBook/gpu.tmpl b/Documentation/DocBook/gpu.tmpl
> index 1692c4d..430e99b 100644
> --- a/Documentation/DocBook/gpu.tmpl
> +++ b/Documentation/DocBook/gpu.tmpl
> @@ -2153,7 +2153,11 @@ void intel_crt_init(struct drm_device *dev)
>  	<td valign="top" >ENUM</td>
>  	<td valign="top" >{ "Automatic", "Full", "Limited 16:235" }</td>
>  	<td valign="top" >Connector</td>
> -	<td valign="top" >TBD</td>
> +	<td valign="top" >When this property is set to Limited 16:235
> +		and CTM is set, the hardware will be programmed with the
> +		result of the multiplication of CTM by the limited range
> +		matrix to ensure the pixels normaly in the range 0..1.0 are
> +		remapped to the range 16/255..235/255.</td>
>  	</tr>
>  	<tr>
>  	<td valign="top" >“audio”</td>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 20e8200..3807b73 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -66,6 +66,9 @@ static struct drm_driver driver;
>  #define IVB_CURSOR_OFFSETS \
>  	.cursor_offsets = { CURSOR_A_OFFSET, IVB_CURSOR_B_OFFSET, IVB_CURSOR_C_OFFSET }
>  
> +#define BDW_COLORS \
> +	.color = { .degamma_lut_size = 512, .gamma_lut_size = 512 }
> +
>  static const struct intel_device_info intel_i830_info = {
>  	.gen = 2, .is_mobile = 1, .cursor_needs_physical = 1, .num_pipes = 2,
>  	.has_overlay = 1, .overlay_needs_physical = 1,
> @@ -288,24 +291,28 @@ static const struct intel_device_info intel_haswell_m_info = {
>  	.is_mobile = 1,
>  };
>  
> +#define BDW_FEATURES \
> +	HSW_FEATURES, \
> +	BDW_COLORS
> +
>  static const struct intel_device_info intel_broadwell_d_info = {
> -	HSW_FEATURES,
> +	BDW_FEATURES,
>  	.gen = 8,
>  };
>  
>  static const struct intel_device_info intel_broadwell_m_info = {
> -	HSW_FEATURES,
> +	BDW_FEATURES,
>  	.gen = 8, .is_mobile = 1,
>  };
>  
>  static const struct intel_device_info intel_broadwell_gt3d_info = {
> -	HSW_FEATURES,
> +	BDW_FEATURES,
>  	.gen = 8,
>  	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
>  };
>  
>  static const struct intel_device_info intel_broadwell_gt3m_info = {
> -	HSW_FEATURES,
> +	BDW_FEATURES,
>  	.gen = 8, .is_mobile = 1,
>  	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
>  };
> @@ -321,13 +328,13 @@ static const struct intel_device_info intel_cherryview_info = {
>  };
>  
>  static const struct intel_device_info intel_skylake_info = {
> -	HSW_FEATURES,
> +	BDW_FEATURES,
>  	.is_skylake = 1,
>  	.gen = 9,
>  };
>  
>  static const struct intel_device_info intel_skylake_gt3_info = {
> -	HSW_FEATURES,
> +	BDW_FEATURES,
>  	.is_skylake = 1,
>  	.gen = 9,
>  	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
> @@ -345,17 +352,18 @@ static const struct intel_device_info intel_broxton_info = {
>  	.has_fbc = 1,
>  	GEN_DEFAULT_PIPEOFFSETS,
>  	IVB_CURSOR_OFFSETS,
> +	BDW_COLORS,
>  };
>  
>  static const struct intel_device_info intel_kabylake_info = {
> -	HSW_FEATURES,
> +	BDW_FEATURES,
>  	.is_preliminary = 1,
>  	.is_kabylake = 1,
>  	.gen = 9,
>  };
>  
>  static const struct intel_device_info intel_kabylake_gt3_info = {
> -	HSW_FEATURES,
> +	BDW_FEATURES,
>  	.is_preliminary = 1,
>  	.is_kabylake = 1,
>  	.gen = 9,
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6634c09..7627a4e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -660,6 +660,7 @@ struct drm_i915_display_funcs {
>  	/* display clock increase/decrease */
>  	/* pll clock increase/decrease */
>  
> +	void (*load_csc_matrix)(struct drm_crtc *crtc);
>  	void (*load_luts)(struct drm_crtc *crtc);
>  };
>  
> @@ -808,6 +809,11 @@ struct intel_device_info {
>  	u8 has_slice_pg:1;
>  	u8 has_subslice_pg:1;
>  	u8 has_eu_pg:1;
> +
> +	struct color_luts {
> +		u16 degamma_lut_size;
> +		u16 gamma_lut_size;
> +	} color;
>  };
>  
>  #undef DEFINE_FLAG
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 3774870..8ce76d7 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7650,6 +7650,28 @@ enum skl_disp_power_wells {
>  #define PIPE_CSC_POSTOFF_ME(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_POSTOFF_ME, _PIPE_B_CSC_POSTOFF_ME)
>  #define PIPE_CSC_POSTOFF_LO(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_POSTOFF_LO, _PIPE_B_CSC_POSTOFF_LO)
>  
> +/* pipe degamma/gamma LUTs on IVB+ */
> +#define _PAL_PREC_INDEX_A	0x4A400
> +#define _PAL_PREC_INDEX_B	0x4AC00
> +#define _PAL_PREC_INDEX_C	0x4B400
> +#define   PAL_PREC_10_12_BIT		(0 << 31)
> +#define   PAL_PREC_SPLIT_MODE		(1 << 31)
> +#define   PAL_PREC_AUTO_INCREMENT	(1 << 15)
> +#define _PAL_PREC_DATA_A	0x4A404
> +#define _PAL_PREC_DATA_B	0x4AC04
> +#define _PAL_PREC_DATA_C	0x4B404
> +#define _PAL_PREC_GC_MAX_A	0x4A410
> +#define _PAL_PREC_GC_MAX_B	0x4AC10
> +#define _PAL_PREC_GC_MAX_C	0x4B410
> +#define _PAL_PREC_EXT_GC_MAX_A	0x4A420
> +#define _PAL_PREC_EXT_GC_MAX_B	0x4AC20
> +#define _PAL_PREC_EXT_GC_MAX_C	0x4B420
> +
> +#define PREC_PAL_INDEX(pipe)		_MMIO_PIPE(pipe, _PAL_PREC_INDEX_A, _PAL_PREC_INDEX_B)
> +#define PREC_PAL_DATA(pipe)		_MMIO_PIPE(pipe, _PAL_PREC_DATA_A, _PAL_PREC_DATA_B)
> +#define PREC_PAL_GC_MAX(pipe, i)	_MMIO(_PIPE(pipe, _PAL_PREC_GC_MAX_A, _PAL_PREC_GC_MAX_B) + (i) * 4)
> +#define PREC_PAL_EXT_GC_MAX(pipe, i)	_MMIO(_PIPE(pipe, _PAL_PREC_EXT_GC_MAX_A, _PAL_PREC_EXT_GC_MAX_B) + (i) * 4)
> +
>  /* MIPI DSI registers */
>  
>  #define _MIPI_PORT(port, a, c)	_PORT3(port, a, 0, c)	/* ports A and C only */
> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
> index ba27ce2..a6c7d22 100644
> --- a/drivers/gpu/drm/i915/intel_color.c
> +++ b/drivers/gpu/drm/i915/intel_color.c
> @@ -24,44 +24,172 @@
>  
>  #include "intel_drv.h"
>  
> +#define CTM_COEFF_SIGN	(1ULL << 63)
> +
> +#define CTM_COEFF_1_0	(1ULL << 32)
> +#define CTM_COEFF_2_0	(CTM_COEFF_1_0 << 1)
> +#define CTM_COEFF_4_0	(CTM_COEFF_2_0 << 1)
> +#define CTM_COEFF_0_5	(CTM_COEFF_1_0 >> 1)
> +#define CTM_COEFF_0_25	(CTM_COEFF_0_5 >> 1)
> +#define CTM_COEFF_0_125	(CTM_COEFF_0_25 >> 1)
> +
> +#define CTM_COEFF_LIMITED_RANGE ((235ULL - 16ULL) * CTM_COEFF_1_0 / 255)
> +
> +#define CTM_COEFF_NEGATIVE(coeff)	(((coeff) & CTM_COEFF_SIGN) != 0)
> +#define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
> +
> +#define LEGACY_LUT_LENGTH		(sizeof(struct drm_color_lut) * 256)
> +
>  /*
> - * Set up the pipe CSC unit.
> + * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
> + * format). This macro takes the coefficient we want transformed and the
> + * number of fractional bits.
>   *
> - * Currently only full range RGB to limited range RGB conversion
> - * is supported, but eventually this should handle various
> - * RGB<->YCbCr scenarios as well.
> + * We only have a 9 bits precision window which slides depending on the value
> + * of the CTM coefficient and we write the value from bit 3. We also round the
> + * value.
>   */
> +#define I9XX_CSC_COEFF_FP(coeff, fbits)	\
> +	(clamp_val(((coeff) >> (32 - (fbits) - 3)) + 4, 0, 0xfff) & 0xff8)
> +
> +#define I9XX_CSC_COEFF_LIMITED_RANGE	\
> +	I9XX_CSC_COEFF_FP(CTM_COEFF_LIMITED_RANGE, 9)
> +#define I9XX_CSC_COEFF_1_0		\
> +	((7 << 12) | I9XX_CSC_COEFF_FP(CTM_COEFF_1_0, 8))
> +
> +static bool crtc_state_is_legacy(struct drm_crtc_state *state)
> +{
> +	return !state->degamma_lut &&
> +		!state->ctm &&
> +		state->gamma_lut &&
> +		state->gamma_lut->length == LEGACY_LUT_LENGTH;
> +}
> +
> +/*
> + * When using limited range, multiply the matrix given by userspace by
> + * the matrix that we would use for the limited range. We do the
> + * multiplication in U2.30 format.
> + */
> +static void ctm_mult_by_limited(uint64_t *result, int64_t *input)
> +{
> +	int i, j;
> +	uint64_t limited_coeffs[9] = { CTM_COEFF_LIMITED_RANGE, 0, 0,
> +				       0, CTM_COEFF_LIMITED_RANGE, 0,
> +				       0, 0, CTM_COEFF_LIMITED_RANGE };
> +
> +	for (i = 0; i < ARRAY_SIZE(limited_coeffs); i++) {
> +		int column = i % 3, row = i / 3;
> +		int negative = 0;
> +
> +		input[i] = 0;





> +		for (j = 0; j < 3; j++) {
> +			int64_t user_coeff = input[j * 3 + column];
> +			uint64_t limited_coeff =
> +				limited_coeffs[row * 3 + j] >> 2;
> +			uint64_t abs_coeff =
> +				clamp_val(CTM_COEFF_ABS(user_coeff),
> +					  0,
> +					  CTM_COEFF_4_0 - 1) >> 2;
> +
> +			if (CTM_COEFF_NEGATIVE(user_coeff))
> +				negative = !negative;
> +			result[i] += limited_coeff * abs_coeff;
> +		}
> +
> +		result[i] >>= 27;
> +		if (negative)
> +			result[i] |= CTM_COEFF_SIGN;
> +	}
> +}
> +
> +/* Set up the pipe CSC unit. */
>  static void i9xx_load_csc_matrix(struct drm_crtc *crtc)
>  {
>  	struct drm_device *dev = crtc->dev;
> +	struct drm_crtc_state *crtc_state = crtc->state;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> -	int pipe = intel_crtc->pipe;
> -	uint16_t coeff = 0x7800; /* 1.0 */
> +	int i, pipe = intel_crtc->pipe;
> +	uint16_t coeffs[9] = { 0, };
>  
> -	/*
> -	 * TODO: Check what kind of values actually come out of the pipe
> -	 * with these coeff/postoff values and adjust to get the best
> -	 * accuracy. Perhaps we even need to take the bpc value into
> -	 * consideration.
> -	 */
> +	if (crtc_state->ctm) {
> +		struct drm_color_ctm *ctm =
> +			(struct drm_color_ctm *)crtc_state->ctm->data;
> +		uint64_t input[9] = { 0, };
>  
> -	if (intel_crtc->config->limited_color_range)
> -		coeff = ((235 - 16) * (1 << 12) / 255) & 0xff8; /* 0.xxx... */
> +		if (intel_crtc->config->limited_color_range)
> +			ctm_mult_by_limited(input, ctm->matrix);
> +		else {
> +			for (i = 0; i < ARRAY_SIZE(input); i++)
> +				input[i] = ctm->matrix[i];
> +		}
> +
> +		/*
> +		 * Convert fixed point S31.32 input to format supported by the
> +		 * hardware.
> +		 */
> +		for (i = 0; i < ARRAY_SIZE(coeffs); i++) {
> +			uint64_t abs_coeff = ((1ULL << 63) - 1) & input[i];
> +
> +			/*
> +			 * Clamp input value to min/max supported by
> +			 * hardware.
> +			 */
> +			abs_coeff = clamp_val(abs_coeff, 0, CTM_COEFF_4_0 - 1);
> +
> +			/* sign bit */
> +			if (CTM_COEFF_NEGATIVE(input[i]))
> +				coeffs[i] |= 1 << 15;
> +
> +			if (abs_coeff < CTM_COEFF_0_125)
> +				coeffs[i] |= (3 << 12) |
> +					I9XX_CSC_COEFF_FP(abs_coeff, 12);
> +			else if (abs_coeff < CTM_COEFF_0_25)
> +				coeffs[i] |= (2 << 12) |
> +					I9XX_CSC_COEFF_FP(abs_coeff, 11);
> +			else if (abs_coeff < CTM_COEFF_0_5)
> +				coeffs[i] |= (1 << 12) |
> +					I9XX_CSC_COEFF_FP(abs_coeff, 10);
> +			else if (abs_coeff < CTM_COEFF_1_0)
> +				coeffs[i] |= I9XX_CSC_COEFF_FP(abs_coeff, 9);
> +			else if (abs_coeff < CTM_COEFF_2_0)
> +				coeffs[i] |= (7 << 12) |
> +					I9XX_CSC_COEFF_FP(abs_coeff, 8);
> +			else
> +				coeffs[i] |= (6 << 12) |
> +					I9XX_CSC_COEFF_FP(abs_coeff, 7);
> +		}
> +	} else {
> +		/*
> +		 * Load an identify matrix if no coefficients are provided.
> +		 *
> +		 * TODO: Check what kind of values actually come out of the
> +		 * pipe with these coeff/postoff values and adjust to get the
> +		 * best accuracy. Perhaps we even need to take the bpc value
> +		 * into consideration.
> +		 */
> +		for (i = 0; i < 3; i++) {
> +			if (intel_crtc->config->limited_color_range)
> +				coeffs[i * 3 + i] =
> +					I9XX_CSC_COEFF_LIMITED_RANGE;
> +			else
> +				coeffs[i * 3 + i] = I9XX_CSC_COEFF_1_0;
> +		}
> +	}
>  
>  	/*
>  	 * GY/GU and RY/RU should be the other way around according
>  	 * to BSpec, but reality doesn't agree. Just set them up in
>  	 * a way that results in the correct picture.
>  	 */
> -	I915_WRITE(PIPE_CSC_COEFF_RY_GY(pipe), coeff << 16);
> -	I915_WRITE(PIPE_CSC_COEFF_BY(pipe), 0);
> +	I915_WRITE(PIPE_CSC_COEFF_RY_GY(pipe), coeffs[0] << 16 | coeffs[1]);
> +	I915_WRITE(PIPE_CSC_COEFF_BY(pipe), coeffs[2] << 16);
>  
> -	I915_WRITE(PIPE_CSC_COEFF_RU_GU(pipe), coeff);
> -	I915_WRITE(PIPE_CSC_COEFF_BU(pipe), 0);
> +	I915_WRITE(PIPE_CSC_COEFF_RU_GU(pipe), coeffs[3] << 16 | coeffs[4]);
> +	I915_WRITE(PIPE_CSC_COEFF_BU(pipe), coeffs[5] << 16);
>  
> -	I915_WRITE(PIPE_CSC_COEFF_RV_GV(pipe), 0);
> -	I915_WRITE(PIPE_CSC_COEFF_BV(pipe), coeff << 16);
> +	I915_WRITE(PIPE_CSC_COEFF_RV_GV(pipe), coeffs[6] << 16 | coeffs[7]);
> +	I915_WRITE(PIPE_CSC_COEFF_BV(pipe), coeffs[8] << 16);
>  
>  	I915_WRITE(PIPE_CSC_PREOFF_HI(pipe), 0);
>  	I915_WRITE(PIPE_CSC_PREOFF_ME(pipe), 0);
> @@ -90,13 +218,18 @@ static void i9xx_load_csc_matrix(struct drm_crtc *crtc)
>  
>  void intel_color_set_csc(struct drm_crtc *crtc)
>  {
> -	i9xx_load_csc_matrix(crtc);
> +	struct drm_device *dev = crtc->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	if (dev_priv->display.load_csc_matrix)
> +		dev_priv->display.load_csc_matrix(crtc);
>  }
>  
> -/* Loads the palette/gamma unit for the CRTC with the prepared values. */
> +/* Loads the legacy palette/gamma unit for the CRTC. */
>  static void i9xx_load_luts(struct drm_crtc *crtc)
>  {
>  	struct drm_device *dev = crtc->dev;
> +	struct drm_crtc_state *state = crtc->state;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	enum pipe pipe = intel_crtc->pipe;
> @@ -109,18 +242,33 @@ static void i9xx_load_luts(struct drm_crtc *crtc)
>  			assert_pll_enabled(dev_priv, pipe);
>  	}
>  
> -	for (i = 0; i < 256; i++) {
> -		uint32_t word = (intel_crtc->lut_r[i] << 16) |
> -			(intel_crtc->lut_g[i] << 8) |
> -			intel_crtc->lut_b[i];
> -		if (HAS_GMCH_DISPLAY(dev))
> -			I915_WRITE(PALETTE(pipe, i), word);
> -		else
> -			I915_WRITE(LGC_PALETTE(pipe, i), word);
> +	if (state->gamma_lut) {
> +		struct drm_color_lut *lut =
> +			(struct drm_color_lut *) state->gamma_lut->data;
> +		for (i = 0; i < 256; i++) {
> +			uint32_t word =
> +				(drm_color_lut_extract(lut[i].red, 8) << 16) |
> +				(drm_color_lut_extract(lut[i].green, 8) << 8) |
> +				drm_color_lut_extract(lut[i].blue, 8);
> +
> +			if (HAS_GMCH_DISPLAY(dev))
> +				I915_WRITE(PALETTE(pipe, i), word);
> +			else
> +				I915_WRITE(LGC_PALETTE(pipe, i), word);
> +		}
> +	} else {
> +		for (i = 0; i < 256; i++) {
> +			uint32_t word = (i << 16) | (i << 8) | i;
> +
> +			if (HAS_GMCH_DISPLAY(dev))
> +				I915_WRITE(PALETTE(pipe, i), word);
> +			else
> +				I915_WRITE(LGC_PALETTE(pipe, i), word);
> +		}
>  	}
>  }
>  
> -/* Loads the legacy palette/gamma unit for the CRTC on Haswell+. */
> +/* Loads the legacy palette/gamma unit for the CRTC on Haswell. */
>  static void haswell_load_luts(struct drm_crtc *crtc)
>  {
>  	struct drm_device *dev = crtc->dev;
> @@ -149,6 +297,89 @@ static void haswell_load_luts(struct drm_crtc *crtc)
>  		hsw_enable_ips(intel_crtc);
>  }
>  
> +/* Loads the palette/gamma unit for the CRTC on Broadwell+. */
> +static void broadwell_load_luts(struct drm_crtc *crtc)
> +{
> +	struct drm_device *dev = crtc->dev;
> +	struct drm_crtc_state *state = crtc->state;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_crtc_state *intel_state = to_intel_crtc_state(state);
> +	enum pipe pipe = to_intel_crtc(crtc)->pipe;
> +	uint32_t i, lut_size = INTEL_INFO(dev)->color.degamma_lut_size;
> +
> +	if (crtc_state_is_legacy(state)) {
> +		haswell_load_luts(crtc);
> +		return;
> +	}
> +
> +	I915_WRITE(PREC_PAL_INDEX(pipe),
> +		   PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT);
> +
> +	if (state->degamma_lut) {
> +		struct drm_color_lut *lut =
> +			(struct drm_color_lut *) state->degamma_lut->data;
> +
> +		for (i = 0; i < lut_size; i++) {
> +			uint32_t word =
> +			drm_color_lut_extract(lut[i].red, 10) << 20 |
> +			drm_color_lut_extract(lut[i].green, 10) << 10 |
> +			drm_color_lut_extract(lut[i].blue, 10);
> +
> +			I915_WRITE(PREC_PAL_DATA(pipe), word);
> +		}
> +	} else {
> +		for (i = 0; i < lut_size; i++) {
> +			uint32_t v = (i * ((1 << 10) - 1)) / (lut_size - 1);
> +
> +			I915_WRITE(PREC_PAL_DATA(pipe),
> +				   (v << 20) | (v << 10) | v);
> +		}
> +	}
> +
> +	if (state->gamma_lut) {
> +		struct drm_color_lut *lut =
> +			(struct drm_color_lut *) state->gamma_lut->data;
> +
> +		for (i = 0; i < lut_size; i++) {
> +			uint32_t word =
> +			(drm_color_lut_extract(lut[i].red, 10) << 20) |
> +			(drm_color_lut_extract(lut[i].green, 10) << 10) |
> +			drm_color_lut_extract(lut[i].blue, 10);
> +
> +			I915_WRITE(PREC_PAL_DATA(pipe), word);
> +		}
> +
> +		/* Program the max register to clamp values > 1.0. */
> +		I915_WRITE(PREC_PAL_GC_MAX(pipe, 0),
> +			   drm_color_lut_extract(lut[i].red, 16));
> +		I915_WRITE(PREC_PAL_GC_MAX(pipe, 1),
> +			   drm_color_lut_extract(lut[i].green, 16));
> +		I915_WRITE(PREC_PAL_GC_MAX(pipe, 2),
> +			   drm_color_lut_extract(lut[i].blue, 16));
> +	} else {
> +		for (i = 0; i < lut_size; i++) {
> +			uint32_t v = (i * ((1 << 10) - 1)) / (lut_size - 1);
> +
> +			I915_WRITE(PREC_PAL_DATA(pipe),
> +				   (v << 20) | (v << 10) | v);
> +		}
> +
> +		I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), (1 << 16) - 1);
> +		I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16) - 1);
> +		I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16) - 1);
> +	}
> +
> +	intel_state->gamma_mode = GAMMA_MODE_MODE_SPLIT;
> +	I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_SPLIT);
> +	POSTING_READ(GAMMA_MODE(pipe));
> +
> +	/*
> +	 * Reset the index, otherwise it prevents the legacy palette to be
> +	 * written properly.
> +	 */
> +	I915_WRITE(PREC_PAL_INDEX(pipe), 0);
> +}
> +
>  void intel_color_load_luts(struct drm_crtc *crtc)
>  {
>  	struct drm_device *dev = crtc->dev;
> @@ -161,38 +392,61 @@ void intel_color_load_luts(struct drm_crtc *crtc)
>  	dev_priv->display.load_luts(crtc);
>  }
>  
> -void intel_color_legacy_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
> -				  u16 *blue, uint32_t start, uint32_t size)
> +int intel_color_check(struct drm_crtc *crtc,
> +		      struct drm_crtc_state *crtc_state)
>  {
> -	int end = (start + size > 256) ? 256 : start + size, i;
> -	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	struct drm_device *dev = crtc->dev;
> +	size_t gamma_length, degamma_length;
>  
> -	for (i = start; i < end; i++) {
> -		intel_crtc->lut_r[i] = red[i] >> 8;
> -		intel_crtc->lut_g[i] = green[i] >> 8;
> -		intel_crtc->lut_b[i] = blue[i] >> 8;
> -	}
> +	degamma_length = INTEL_INFO(dev)->color.degamma_lut_size *
> +		sizeof(struct drm_color_lut);
> +	gamma_length = INTEL_INFO(dev)->color.gamma_lut_size *
> +		sizeof(struct drm_color_lut);
> +
> +	/*
> +	 * We allow both degamma & gamma luts at the right size or
> +	 * NULL.
> +	 */
> +	if ((!crtc_state->degamma_lut ||
> +	     crtc_state->degamma_lut->length == degamma_length) &&
> +	    (!crtc_state->gamma_lut ||
> +	     crtc_state->gamma_lut->length == gamma_length))
> +		return 0;
> +
> +	/*
> +	 * We also allow no degamma lut and a gamma lut at the legacy
> +	 * size (256 entries).
> +	 */
> +	if (!crtc_state->degamma_lut &&
> +	    crtc_state->gamma_lut &&
> +	    crtc_state->gamma_lut->length == LEGACY_LUT_LENGTH)
> +		return 0;
>  
> -	intel_color_load_luts(crtc);
> +	return -EINVAL;
>  }
>  
>  void intel_color_init(struct drm_crtc *crtc)
>  {
>  	struct drm_device *dev = crtc->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> -	int i;
>  
>  	drm_mode_crtc_set_gamma_size(crtc, 256);
> -	for (i = 0; i < 256; i++) {
> -		intel_crtc->lut_r[i] = i;
> -		intel_crtc->lut_g[i] = i;
> -		intel_crtc->lut_b[i] = i;
> -	}
>  
> -	if (IS_HASWELL(dev) ||
> -	    (INTEL_INFO(dev)->gen >= 8 && !IS_CHERRYVIEW(dev)))
> +	if (IS_HASWELL(dev)) {
> +		dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix;
>  		dev_priv->display.load_luts = haswell_load_luts;
> -	else
> +	} else if (IS_BROADWELL(dev) || IS_SKYLAKE(dev) ||
> +		   IS_BROXTON(dev) || IS_KABYLAKE(dev)) {
> +		dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix;
> +		dev_priv->display.load_luts = broadwell_load_luts;
> +	} else {
>  		dev_priv->display.load_luts = i9xx_load_luts;
> +	}
> +
> +	/* Enable color management support when we have degamma & gamma LUTs. */
> +	if (INTEL_INFO(dev)->color.degamma_lut_size != 0 &&
> +	    INTEL_INFO(dev)->color.gamma_lut_size != 0)
> +		drm_helper_crtc_enable_color_mgmt(crtc,
> +					INTEL_INFO(dev)->color.degamma_lut_size,
> +					INTEL_INFO(dev)->color.gamma_lut_size);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index acbb1d9..1aafe00 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11846,6 +11846,12 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
>  			return ret;
>  	}
>  
> +	if (crtc_state->color_mgmt_changed) {
> +		ret = intel_color_check(crtc, crtc_state);
> +		if (ret)
> +			return ret;
> +	}
> +
>  	ret = 0;
>  	if (dev_priv->display.compute_pipe_wm) {
>  		ret = dev_priv->display.compute_pipe_wm(intel_crtc, state);
> @@ -11867,7 +11873,6 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
>  
>  static const struct drm_crtc_helper_funcs intel_helper_funcs = {
>  	.mode_set_base_atomic = intel_pipe_set_base_atomic,
> -	.load_lut = intel_color_load_luts,
>  	.atomic_begin = intel_begin_crtc_commit,
>  	.atomic_flush = intel_finish_crtc_commit,
>  	.atomic_check = intel_crtc_atomic_check,
> @@ -13398,6 +13403,18 @@ static int intel_atomic_commit(struct drm_device *dev,
>  			hw_check = true;
>  		}
>  
> +		if (!modeset &&
> +		    crtc->state->active &&
> +		    crtc->state->color_mgmt_changed) {
> +			/*
> +			 * Only update color management when not doing
> +			 * a modeset as this will be done by
> +			 * crtc_enable already.
> +			 */
> +			intel_color_set_csc(crtc);
> +			intel_color_load_luts(crtc);
> +		}
> +
>  		if (!modeset)
>  			intel_pre_plane_update(to_intel_crtc_state(crtc_state));
>  
> @@ -13487,8 +13504,9 @@ out:
>  #undef for_each_intel_crtc_masked
>  
>  static const struct drm_crtc_funcs intel_crtc_funcs = {
> -	.gamma_set = intel_color_legacy_gamma_set,
> +	.gamma_set = drm_atomic_helper_legacy_gamma_set,
>  	.set_config = drm_atomic_helper_set_config,
> +	.set_property = drm_atomic_helper_crtc_set_property,
>  	.destroy = intel_crtc_destroy,
>  	.page_flip = intel_crtc_page_flip,
>  	.atomic_duplicate_state = intel_crtc_duplicate_state,
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 9742d5b..3082dbe 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1624,9 +1624,8 @@ extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
>  
>  /* intel_color.c */
>  void intel_color_init(struct drm_crtc *crtc);
> +int intel_color_check(struct drm_crtc *crtc, struct drm_crtc_state *state);
>  void intel_color_set_csc(struct drm_crtc *crtc);
>  void intel_color_load_luts(struct drm_crtc *crtc);
> -void intel_color_legacy_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
> -				  u16 *blue, uint32_t start, uint32_t size);
>  
>  #endif /* __INTEL_DRV_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> index 97a91e6..777f98c 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -379,6 +379,7 @@ retry:
>  		struct drm_connector *connector;
>  		struct drm_encoder *encoder;
>  		struct drm_fb_helper_crtc *new_crtc;
> +		struct intel_crtc *intel_crtc;
>  
>  		fb_conn = fb_helper->connector_info[i];
>  		connector = fb_conn->connector;
> @@ -420,6 +421,13 @@ retry:
>  
>  		num_connectors_enabled++;
>  
> +		intel_crtc = to_intel_crtc(connector->state->crtc);
> +		for (j = 0; j < 256; j++) {
> +			intel_crtc->lut_r[j] = j;
> +			intel_crtc->lut_g[j] = j;
> +			intel_crtc->lut_b[j] = j;
> +		}
> +
>  		new_crtc = intel_fb_helper_crtc(fb_helper, connector->state->crtc);
>  
>  		/*
> -- 
> 2.7.0
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Lionel Landwerlin Feb. 23, 2016, 2:42 p.m. UTC | #2
On 23/02/16 00:52, Matt Roper wrote:
> On Mon, Feb 22, 2016 at 02:18:10PM +0000, Lionel Landwerlin wrote:
>> Patch based on a previous series by Shashank Sharma.
>>
>> v2: Do not read GAMMA_MODE register to figure what mode we're in
>>
>> v3: Program PREC_PAL_GC_MAX to clamp pixel values > 1.0
>>
>>      Add documentation on how the Broadcast RGB property is affected by CTM
>>
>> v4: Update contributors
>>
>> v5: Refactor degamma/gamma LUTs load into a single function
>>
>> v6: Fix missing intel_crtc variable (bisect issue)
> Not sure if you saw me feedback on v4 or not:
>     https://lists.freedesktop.org/archives/intel-gfx/2016-February/088043.html
>
> It looks like most of those comments still apply here.  If you disagree
> with my comments, that's fine too, I just wanted to make sure it didn't
> get overlooked.  :-)
>
>
> Matt

Sorry for that, completely missed on the first 3/4 comments.
You're right the limited range multiplication was wrong (input[i] vs 
result[i]...).
Fixed that and simplified it as you suggested.

Thanks a lot!

-
Lionel
diff mbox

Patch

diff --git a/Documentation/DocBook/gpu.tmpl b/Documentation/DocBook/gpu.tmpl
index 1692c4d..430e99b 100644
--- a/Documentation/DocBook/gpu.tmpl
+++ b/Documentation/DocBook/gpu.tmpl
@@ -2153,7 +2153,11 @@  void intel_crt_init(struct drm_device *dev)
 	<td valign="top" >ENUM</td>
 	<td valign="top" >{ "Automatic", "Full", "Limited 16:235" }</td>
 	<td valign="top" >Connector</td>
-	<td valign="top" >TBD</td>
+	<td valign="top" >When this property is set to Limited 16:235
+		and CTM is set, the hardware will be programmed with the
+		result of the multiplication of CTM by the limited range
+		matrix to ensure the pixels normaly in the range 0..1.0 are
+		remapped to the range 16/255..235/255.</td>
 	</tr>
 	<tr>
 	<td valign="top" >“audio”</td>
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 20e8200..3807b73 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -66,6 +66,9 @@  static struct drm_driver driver;
 #define IVB_CURSOR_OFFSETS \
 	.cursor_offsets = { CURSOR_A_OFFSET, IVB_CURSOR_B_OFFSET, IVB_CURSOR_C_OFFSET }
 
+#define BDW_COLORS \
+	.color = { .degamma_lut_size = 512, .gamma_lut_size = 512 }
+
 static const struct intel_device_info intel_i830_info = {
 	.gen = 2, .is_mobile = 1, .cursor_needs_physical = 1, .num_pipes = 2,
 	.has_overlay = 1, .overlay_needs_physical = 1,
@@ -288,24 +291,28 @@  static const struct intel_device_info intel_haswell_m_info = {
 	.is_mobile = 1,
 };
 
+#define BDW_FEATURES \
+	HSW_FEATURES, \
+	BDW_COLORS
+
 static const struct intel_device_info intel_broadwell_d_info = {
-	HSW_FEATURES,
+	BDW_FEATURES,
 	.gen = 8,
 };
 
 static const struct intel_device_info intel_broadwell_m_info = {
-	HSW_FEATURES,
+	BDW_FEATURES,
 	.gen = 8, .is_mobile = 1,
 };
 
 static const struct intel_device_info intel_broadwell_gt3d_info = {
-	HSW_FEATURES,
+	BDW_FEATURES,
 	.gen = 8,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
 };
 
 static const struct intel_device_info intel_broadwell_gt3m_info = {
-	HSW_FEATURES,
+	BDW_FEATURES,
 	.gen = 8, .is_mobile = 1,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
 };
@@ -321,13 +328,13 @@  static const struct intel_device_info intel_cherryview_info = {
 };
 
 static const struct intel_device_info intel_skylake_info = {
-	HSW_FEATURES,
+	BDW_FEATURES,
 	.is_skylake = 1,
 	.gen = 9,
 };
 
 static const struct intel_device_info intel_skylake_gt3_info = {
-	HSW_FEATURES,
+	BDW_FEATURES,
 	.is_skylake = 1,
 	.gen = 9,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
@@ -345,17 +352,18 @@  static const struct intel_device_info intel_broxton_info = {
 	.has_fbc = 1,
 	GEN_DEFAULT_PIPEOFFSETS,
 	IVB_CURSOR_OFFSETS,
+	BDW_COLORS,
 };
 
 static const struct intel_device_info intel_kabylake_info = {
-	HSW_FEATURES,
+	BDW_FEATURES,
 	.is_preliminary = 1,
 	.is_kabylake = 1,
 	.gen = 9,
 };
 
 static const struct intel_device_info intel_kabylake_gt3_info = {
-	HSW_FEATURES,
+	BDW_FEATURES,
 	.is_preliminary = 1,
 	.is_kabylake = 1,
 	.gen = 9,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6634c09..7627a4e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -660,6 +660,7 @@  struct drm_i915_display_funcs {
 	/* display clock increase/decrease */
 	/* pll clock increase/decrease */
 
+	void (*load_csc_matrix)(struct drm_crtc *crtc);
 	void (*load_luts)(struct drm_crtc *crtc);
 };
 
@@ -808,6 +809,11 @@  struct intel_device_info {
 	u8 has_slice_pg:1;
 	u8 has_subslice_pg:1;
 	u8 has_eu_pg:1;
+
+	struct color_luts {
+		u16 degamma_lut_size;
+		u16 gamma_lut_size;
+	} color;
 };
 
 #undef DEFINE_FLAG
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3774870..8ce76d7 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7650,6 +7650,28 @@  enum skl_disp_power_wells {
 #define PIPE_CSC_POSTOFF_ME(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_POSTOFF_ME, _PIPE_B_CSC_POSTOFF_ME)
 #define PIPE_CSC_POSTOFF_LO(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_POSTOFF_LO, _PIPE_B_CSC_POSTOFF_LO)
 
+/* pipe degamma/gamma LUTs on IVB+ */
+#define _PAL_PREC_INDEX_A	0x4A400
+#define _PAL_PREC_INDEX_B	0x4AC00
+#define _PAL_PREC_INDEX_C	0x4B400
+#define   PAL_PREC_10_12_BIT		(0 << 31)
+#define   PAL_PREC_SPLIT_MODE		(1 << 31)
+#define   PAL_PREC_AUTO_INCREMENT	(1 << 15)
+#define _PAL_PREC_DATA_A	0x4A404
+#define _PAL_PREC_DATA_B	0x4AC04
+#define _PAL_PREC_DATA_C	0x4B404
+#define _PAL_PREC_GC_MAX_A	0x4A410
+#define _PAL_PREC_GC_MAX_B	0x4AC10
+#define _PAL_PREC_GC_MAX_C	0x4B410
+#define _PAL_PREC_EXT_GC_MAX_A	0x4A420
+#define _PAL_PREC_EXT_GC_MAX_B	0x4AC20
+#define _PAL_PREC_EXT_GC_MAX_C	0x4B420
+
+#define PREC_PAL_INDEX(pipe)		_MMIO_PIPE(pipe, _PAL_PREC_INDEX_A, _PAL_PREC_INDEX_B)
+#define PREC_PAL_DATA(pipe)		_MMIO_PIPE(pipe, _PAL_PREC_DATA_A, _PAL_PREC_DATA_B)
+#define PREC_PAL_GC_MAX(pipe, i)	_MMIO(_PIPE(pipe, _PAL_PREC_GC_MAX_A, _PAL_PREC_GC_MAX_B) + (i) * 4)
+#define PREC_PAL_EXT_GC_MAX(pipe, i)	_MMIO(_PIPE(pipe, _PAL_PREC_EXT_GC_MAX_A, _PAL_PREC_EXT_GC_MAX_B) + (i) * 4)
+
 /* MIPI DSI registers */
 
 #define _MIPI_PORT(port, a, c)	_PORT3(port, a, 0, c)	/* ports A and C only */
diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
index ba27ce2..a6c7d22 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -24,44 +24,172 @@ 
 
 #include "intel_drv.h"
 
+#define CTM_COEFF_SIGN	(1ULL << 63)
+
+#define CTM_COEFF_1_0	(1ULL << 32)
+#define CTM_COEFF_2_0	(CTM_COEFF_1_0 << 1)
+#define CTM_COEFF_4_0	(CTM_COEFF_2_0 << 1)
+#define CTM_COEFF_0_5	(CTM_COEFF_1_0 >> 1)
+#define CTM_COEFF_0_25	(CTM_COEFF_0_5 >> 1)
+#define CTM_COEFF_0_125	(CTM_COEFF_0_25 >> 1)
+
+#define CTM_COEFF_LIMITED_RANGE ((235ULL - 16ULL) * CTM_COEFF_1_0 / 255)
+
+#define CTM_COEFF_NEGATIVE(coeff)	(((coeff) & CTM_COEFF_SIGN) != 0)
+#define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
+
+#define LEGACY_LUT_LENGTH		(sizeof(struct drm_color_lut) * 256)
+
 /*
- * Set up the pipe CSC unit.
+ * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
+ * format). This macro takes the coefficient we want transformed and the
+ * number of fractional bits.
  *
- * Currently only full range RGB to limited range RGB conversion
- * is supported, but eventually this should handle various
- * RGB<->YCbCr scenarios as well.
+ * We only have a 9 bits precision window which slides depending on the value
+ * of the CTM coefficient and we write the value from bit 3. We also round the
+ * value.
  */
+#define I9XX_CSC_COEFF_FP(coeff, fbits)	\
+	(clamp_val(((coeff) >> (32 - (fbits) - 3)) + 4, 0, 0xfff) & 0xff8)
+
+#define I9XX_CSC_COEFF_LIMITED_RANGE	\
+	I9XX_CSC_COEFF_FP(CTM_COEFF_LIMITED_RANGE, 9)
+#define I9XX_CSC_COEFF_1_0		\
+	((7 << 12) | I9XX_CSC_COEFF_FP(CTM_COEFF_1_0, 8))
+
+static bool crtc_state_is_legacy(struct drm_crtc_state *state)
+{
+	return !state->degamma_lut &&
+		!state->ctm &&
+		state->gamma_lut &&
+		state->gamma_lut->length == LEGACY_LUT_LENGTH;
+}
+
+/*
+ * When using limited range, multiply the matrix given by userspace by
+ * the matrix that we would use for the limited range. We do the
+ * multiplication in U2.30 format.
+ */
+static void ctm_mult_by_limited(uint64_t *result, int64_t *input)
+{
+	int i, j;
+	uint64_t limited_coeffs[9] = { CTM_COEFF_LIMITED_RANGE, 0, 0,
+				       0, CTM_COEFF_LIMITED_RANGE, 0,
+				       0, 0, CTM_COEFF_LIMITED_RANGE };
+
+	for (i = 0; i < ARRAY_SIZE(limited_coeffs); i++) {
+		int column = i % 3, row = i / 3;
+		int negative = 0;
+
+		input[i] = 0;
+		for (j = 0; j < 3; j++) {
+			int64_t user_coeff = input[j * 3 + column];
+			uint64_t limited_coeff =
+				limited_coeffs[row * 3 + j] >> 2;
+			uint64_t abs_coeff =
+				clamp_val(CTM_COEFF_ABS(user_coeff),
+					  0,
+					  CTM_COEFF_4_0 - 1) >> 2;
+
+			if (CTM_COEFF_NEGATIVE(user_coeff))
+				negative = !negative;
+			result[i] += limited_coeff * abs_coeff;
+		}
+
+		result[i] >>= 27;
+		if (negative)
+			result[i] |= CTM_COEFF_SIGN;
+	}
+}
+
+/* Set up the pipe CSC unit. */
 static void i9xx_load_csc_matrix(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
+	struct drm_crtc_state *crtc_state = crtc->state;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-	uint16_t coeff = 0x7800; /* 1.0 */
+	int i, pipe = intel_crtc->pipe;
+	uint16_t coeffs[9] = { 0, };
 
-	/*
-	 * TODO: Check what kind of values actually come out of the pipe
-	 * with these coeff/postoff values and adjust to get the best
-	 * accuracy. Perhaps we even need to take the bpc value into
-	 * consideration.
-	 */
+	if (crtc_state->ctm) {
+		struct drm_color_ctm *ctm =
+			(struct drm_color_ctm *)crtc_state->ctm->data;
+		uint64_t input[9] = { 0, };
 
-	if (intel_crtc->config->limited_color_range)
-		coeff = ((235 - 16) * (1 << 12) / 255) & 0xff8; /* 0.xxx... */
+		if (intel_crtc->config->limited_color_range)
+			ctm_mult_by_limited(input, ctm->matrix);
+		else {
+			for (i = 0; i < ARRAY_SIZE(input); i++)
+				input[i] = ctm->matrix[i];
+		}
+
+		/*
+		 * Convert fixed point S31.32 input to format supported by the
+		 * hardware.
+		 */
+		for (i = 0; i < ARRAY_SIZE(coeffs); i++) {
+			uint64_t abs_coeff = ((1ULL << 63) - 1) & input[i];
+
+			/*
+			 * Clamp input value to min/max supported by
+			 * hardware.
+			 */
+			abs_coeff = clamp_val(abs_coeff, 0, CTM_COEFF_4_0 - 1);
+
+			/* sign bit */
+			if (CTM_COEFF_NEGATIVE(input[i]))
+				coeffs[i] |= 1 << 15;
+
+			if (abs_coeff < CTM_COEFF_0_125)
+				coeffs[i] |= (3 << 12) |
+					I9XX_CSC_COEFF_FP(abs_coeff, 12);
+			else if (abs_coeff < CTM_COEFF_0_25)
+				coeffs[i] |= (2 << 12) |
+					I9XX_CSC_COEFF_FP(abs_coeff, 11);
+			else if (abs_coeff < CTM_COEFF_0_5)
+				coeffs[i] |= (1 << 12) |
+					I9XX_CSC_COEFF_FP(abs_coeff, 10);
+			else if (abs_coeff < CTM_COEFF_1_0)
+				coeffs[i] |= I9XX_CSC_COEFF_FP(abs_coeff, 9);
+			else if (abs_coeff < CTM_COEFF_2_0)
+				coeffs[i] |= (7 << 12) |
+					I9XX_CSC_COEFF_FP(abs_coeff, 8);
+			else
+				coeffs[i] |= (6 << 12) |
+					I9XX_CSC_COEFF_FP(abs_coeff, 7);
+		}
+	} else {
+		/*
+		 * Load an identify matrix if no coefficients are provided.
+		 *
+		 * TODO: Check what kind of values actually come out of the
+		 * pipe with these coeff/postoff values and adjust to get the
+		 * best accuracy. Perhaps we even need to take the bpc value
+		 * into consideration.
+		 */
+		for (i = 0; i < 3; i++) {
+			if (intel_crtc->config->limited_color_range)
+				coeffs[i * 3 + i] =
+					I9XX_CSC_COEFF_LIMITED_RANGE;
+			else
+				coeffs[i * 3 + i] = I9XX_CSC_COEFF_1_0;
+		}
+	}
 
 	/*
 	 * GY/GU and RY/RU should be the other way around according
 	 * to BSpec, but reality doesn't agree. Just set them up in
 	 * a way that results in the correct picture.
 	 */
-	I915_WRITE(PIPE_CSC_COEFF_RY_GY(pipe), coeff << 16);
-	I915_WRITE(PIPE_CSC_COEFF_BY(pipe), 0);
+	I915_WRITE(PIPE_CSC_COEFF_RY_GY(pipe), coeffs[0] << 16 | coeffs[1]);
+	I915_WRITE(PIPE_CSC_COEFF_BY(pipe), coeffs[2] << 16);
 
-	I915_WRITE(PIPE_CSC_COEFF_RU_GU(pipe), coeff);
-	I915_WRITE(PIPE_CSC_COEFF_BU(pipe), 0);
+	I915_WRITE(PIPE_CSC_COEFF_RU_GU(pipe), coeffs[3] << 16 | coeffs[4]);
+	I915_WRITE(PIPE_CSC_COEFF_BU(pipe), coeffs[5] << 16);
 
-	I915_WRITE(PIPE_CSC_COEFF_RV_GV(pipe), 0);
-	I915_WRITE(PIPE_CSC_COEFF_BV(pipe), coeff << 16);
+	I915_WRITE(PIPE_CSC_COEFF_RV_GV(pipe), coeffs[6] << 16 | coeffs[7]);
+	I915_WRITE(PIPE_CSC_COEFF_BV(pipe), coeffs[8] << 16);
 
 	I915_WRITE(PIPE_CSC_PREOFF_HI(pipe), 0);
 	I915_WRITE(PIPE_CSC_PREOFF_ME(pipe), 0);
@@ -90,13 +218,18 @@  static void i9xx_load_csc_matrix(struct drm_crtc *crtc)
 
 void intel_color_set_csc(struct drm_crtc *crtc)
 {
-	i9xx_load_csc_matrix(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (dev_priv->display.load_csc_matrix)
+		dev_priv->display.load_csc_matrix(crtc);
 }
 
-/* Loads the palette/gamma unit for the CRTC with the prepared values. */
+/* Loads the legacy palette/gamma unit for the CRTC. */
 static void i9xx_load_luts(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
+	struct drm_crtc_state *state = crtc->state;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	enum pipe pipe = intel_crtc->pipe;
@@ -109,18 +242,33 @@  static void i9xx_load_luts(struct drm_crtc *crtc)
 			assert_pll_enabled(dev_priv, pipe);
 	}
 
-	for (i = 0; i < 256; i++) {
-		uint32_t word = (intel_crtc->lut_r[i] << 16) |
-			(intel_crtc->lut_g[i] << 8) |
-			intel_crtc->lut_b[i];
-		if (HAS_GMCH_DISPLAY(dev))
-			I915_WRITE(PALETTE(pipe, i), word);
-		else
-			I915_WRITE(LGC_PALETTE(pipe, i), word);
+	if (state->gamma_lut) {
+		struct drm_color_lut *lut =
+			(struct drm_color_lut *) state->gamma_lut->data;
+		for (i = 0; i < 256; i++) {
+			uint32_t word =
+				(drm_color_lut_extract(lut[i].red, 8) << 16) |
+				(drm_color_lut_extract(lut[i].green, 8) << 8) |
+				drm_color_lut_extract(lut[i].blue, 8);
+
+			if (HAS_GMCH_DISPLAY(dev))
+				I915_WRITE(PALETTE(pipe, i), word);
+			else
+				I915_WRITE(LGC_PALETTE(pipe, i), word);
+		}
+	} else {
+		for (i = 0; i < 256; i++) {
+			uint32_t word = (i << 16) | (i << 8) | i;
+
+			if (HAS_GMCH_DISPLAY(dev))
+				I915_WRITE(PALETTE(pipe, i), word);
+			else
+				I915_WRITE(LGC_PALETTE(pipe, i), word);
+		}
 	}
 }
 
-/* Loads the legacy palette/gamma unit for the CRTC on Haswell+. */
+/* Loads the legacy palette/gamma unit for the CRTC on Haswell. */
 static void haswell_load_luts(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -149,6 +297,89 @@  static void haswell_load_luts(struct drm_crtc *crtc)
 		hsw_enable_ips(intel_crtc);
 }
 
+/* Loads the palette/gamma unit for the CRTC on Broadwell+. */
+static void broadwell_load_luts(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_crtc_state *state = crtc->state;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc_state *intel_state = to_intel_crtc_state(state);
+	enum pipe pipe = to_intel_crtc(crtc)->pipe;
+	uint32_t i, lut_size = INTEL_INFO(dev)->color.degamma_lut_size;
+
+	if (crtc_state_is_legacy(state)) {
+		haswell_load_luts(crtc);
+		return;
+	}
+
+	I915_WRITE(PREC_PAL_INDEX(pipe),
+		   PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT);
+
+	if (state->degamma_lut) {
+		struct drm_color_lut *lut =
+			(struct drm_color_lut *) state->degamma_lut->data;
+
+		for (i = 0; i < lut_size; i++) {
+			uint32_t word =
+			drm_color_lut_extract(lut[i].red, 10) << 20 |
+			drm_color_lut_extract(lut[i].green, 10) << 10 |
+			drm_color_lut_extract(lut[i].blue, 10);
+
+			I915_WRITE(PREC_PAL_DATA(pipe), word);
+		}
+	} else {
+		for (i = 0; i < lut_size; i++) {
+			uint32_t v = (i * ((1 << 10) - 1)) / (lut_size - 1);
+
+			I915_WRITE(PREC_PAL_DATA(pipe),
+				   (v << 20) | (v << 10) | v);
+		}
+	}
+
+	if (state->gamma_lut) {
+		struct drm_color_lut *lut =
+			(struct drm_color_lut *) state->gamma_lut->data;
+
+		for (i = 0; i < lut_size; i++) {
+			uint32_t word =
+			(drm_color_lut_extract(lut[i].red, 10) << 20) |
+			(drm_color_lut_extract(lut[i].green, 10) << 10) |
+			drm_color_lut_extract(lut[i].blue, 10);
+
+			I915_WRITE(PREC_PAL_DATA(pipe), word);
+		}
+
+		/* Program the max register to clamp values > 1.0. */
+		I915_WRITE(PREC_PAL_GC_MAX(pipe, 0),
+			   drm_color_lut_extract(lut[i].red, 16));
+		I915_WRITE(PREC_PAL_GC_MAX(pipe, 1),
+			   drm_color_lut_extract(lut[i].green, 16));
+		I915_WRITE(PREC_PAL_GC_MAX(pipe, 2),
+			   drm_color_lut_extract(lut[i].blue, 16));
+	} else {
+		for (i = 0; i < lut_size; i++) {
+			uint32_t v = (i * ((1 << 10) - 1)) / (lut_size - 1);
+
+			I915_WRITE(PREC_PAL_DATA(pipe),
+				   (v << 20) | (v << 10) | v);
+		}
+
+		I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), (1 << 16) - 1);
+		I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16) - 1);
+		I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16) - 1);
+	}
+
+	intel_state->gamma_mode = GAMMA_MODE_MODE_SPLIT;
+	I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_SPLIT);
+	POSTING_READ(GAMMA_MODE(pipe));
+
+	/*
+	 * Reset the index, otherwise it prevents the legacy palette to be
+	 * written properly.
+	 */
+	I915_WRITE(PREC_PAL_INDEX(pipe), 0);
+}
+
 void intel_color_load_luts(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -161,38 +392,61 @@  void intel_color_load_luts(struct drm_crtc *crtc)
 	dev_priv->display.load_luts(crtc);
 }
 
-void intel_color_legacy_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-				  u16 *blue, uint32_t start, uint32_t size)
+int intel_color_check(struct drm_crtc *crtc,
+		      struct drm_crtc_state *crtc_state)
 {
-	int end = (start + size > 256) ? 256 : start + size, i;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	size_t gamma_length, degamma_length;
 
-	for (i = start; i < end; i++) {
-		intel_crtc->lut_r[i] = red[i] >> 8;
-		intel_crtc->lut_g[i] = green[i] >> 8;
-		intel_crtc->lut_b[i] = blue[i] >> 8;
-	}
+	degamma_length = INTEL_INFO(dev)->color.degamma_lut_size *
+		sizeof(struct drm_color_lut);
+	gamma_length = INTEL_INFO(dev)->color.gamma_lut_size *
+		sizeof(struct drm_color_lut);
+
+	/*
+	 * We allow both degamma & gamma luts at the right size or
+	 * NULL.
+	 */
+	if ((!crtc_state->degamma_lut ||
+	     crtc_state->degamma_lut->length == degamma_length) &&
+	    (!crtc_state->gamma_lut ||
+	     crtc_state->gamma_lut->length == gamma_length))
+		return 0;
+
+	/*
+	 * We also allow no degamma lut and a gamma lut at the legacy
+	 * size (256 entries).
+	 */
+	if (!crtc_state->degamma_lut &&
+	    crtc_state->gamma_lut &&
+	    crtc_state->gamma_lut->length == LEGACY_LUT_LENGTH)
+		return 0;
 
-	intel_color_load_luts(crtc);
+	return -EINVAL;
 }
 
 void intel_color_init(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int i;
 
 	drm_mode_crtc_set_gamma_size(crtc, 256);
-	for (i = 0; i < 256; i++) {
-		intel_crtc->lut_r[i] = i;
-		intel_crtc->lut_g[i] = i;
-		intel_crtc->lut_b[i] = i;
-	}
 
-	if (IS_HASWELL(dev) ||
-	    (INTEL_INFO(dev)->gen >= 8 && !IS_CHERRYVIEW(dev)))
+	if (IS_HASWELL(dev)) {
+		dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix;
 		dev_priv->display.load_luts = haswell_load_luts;
-	else
+	} else if (IS_BROADWELL(dev) || IS_SKYLAKE(dev) ||
+		   IS_BROXTON(dev) || IS_KABYLAKE(dev)) {
+		dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix;
+		dev_priv->display.load_luts = broadwell_load_luts;
+	} else {
 		dev_priv->display.load_luts = i9xx_load_luts;
+	}
+
+	/* Enable color management support when we have degamma & gamma LUTs. */
+	if (INTEL_INFO(dev)->color.degamma_lut_size != 0 &&
+	    INTEL_INFO(dev)->color.gamma_lut_size != 0)
+		drm_helper_crtc_enable_color_mgmt(crtc,
+					INTEL_INFO(dev)->color.degamma_lut_size,
+					INTEL_INFO(dev)->color.gamma_lut_size);
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index acbb1d9..1aafe00 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11846,6 +11846,12 @@  static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 			return ret;
 	}
 
+	if (crtc_state->color_mgmt_changed) {
+		ret = intel_color_check(crtc, crtc_state);
+		if (ret)
+			return ret;
+	}
+
 	ret = 0;
 	if (dev_priv->display.compute_pipe_wm) {
 		ret = dev_priv->display.compute_pipe_wm(intel_crtc, state);
@@ -11867,7 +11873,6 @@  static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 
 static const struct drm_crtc_helper_funcs intel_helper_funcs = {
 	.mode_set_base_atomic = intel_pipe_set_base_atomic,
-	.load_lut = intel_color_load_luts,
 	.atomic_begin = intel_begin_crtc_commit,
 	.atomic_flush = intel_finish_crtc_commit,
 	.atomic_check = intel_crtc_atomic_check,
@@ -13398,6 +13403,18 @@  static int intel_atomic_commit(struct drm_device *dev,
 			hw_check = true;
 		}
 
+		if (!modeset &&
+		    crtc->state->active &&
+		    crtc->state->color_mgmt_changed) {
+			/*
+			 * Only update color management when not doing
+			 * a modeset as this will be done by
+			 * crtc_enable already.
+			 */
+			intel_color_set_csc(crtc);
+			intel_color_load_luts(crtc);
+		}
+
 		if (!modeset)
 			intel_pre_plane_update(to_intel_crtc_state(crtc_state));
 
@@ -13487,8 +13504,9 @@  out:
 #undef for_each_intel_crtc_masked
 
 static const struct drm_crtc_funcs intel_crtc_funcs = {
-	.gamma_set = intel_color_legacy_gamma_set,
+	.gamma_set = drm_atomic_helper_legacy_gamma_set,
 	.set_config = drm_atomic_helper_set_config,
+	.set_property = drm_atomic_helper_crtc_set_property,
 	.destroy = intel_crtc_destroy,
 	.page_flip = intel_crtc_page_flip,
 	.atomic_duplicate_state = intel_crtc_duplicate_state,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 9742d5b..3082dbe 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1624,9 +1624,8 @@  extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
 
 /* intel_color.c */
 void intel_color_init(struct drm_crtc *crtc);
+int intel_color_check(struct drm_crtc *crtc, struct drm_crtc_state *state);
 void intel_color_set_csc(struct drm_crtc *crtc);
 void intel_color_load_luts(struct drm_crtc *crtc);
-void intel_color_legacy_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-				  u16 *blue, uint32_t start, uint32_t size);
 
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 97a91e6..777f98c 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -379,6 +379,7 @@  retry:
 		struct drm_connector *connector;
 		struct drm_encoder *encoder;
 		struct drm_fb_helper_crtc *new_crtc;
+		struct intel_crtc *intel_crtc;
 
 		fb_conn = fb_helper->connector_info[i];
 		connector = fb_conn->connector;
@@ -420,6 +421,13 @@  retry:
 
 		num_connectors_enabled++;
 
+		intel_crtc = to_intel_crtc(connector->state->crtc);
+		for (j = 0; j < 256; j++) {
+			intel_crtc->lut_r[j] = j;
+			intel_crtc->lut_g[j] = j;
+			intel_crtc->lut_b[j] = j;
+		}
+
 		new_crtc = intel_fb_helper_crtc(fb_helper, connector->state->crtc);
 
 		/*