diff mbox

[v3,6/9] drm/i915/skl+: change WM calc to fixed point 16.16

Message ID 20160909080106.17506-7-mahesh1.kumar@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kumar, Mahesh Sept. 9, 2016, 8:01 a.m. UTC
From: Mahesh Kumar <mahesh1.kumar@intel.com>

This patch changes Watermak calculation to fixed point calculation.
Problem with current calculation is during plane_blocks_per_line
calculation we divide intermediate blocks with min_scanlines and
takes floor of the result because of integer operation.
hence we end-up assigning less blocks than required. Which leads to
flickers.

Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

Comments

Zanoni, Paulo R Sept. 21, 2016, 6:32 p.m. UTC | #1
Hi

Em Sex, 2016-09-09 às 13:31 +0530, Kumar, Mahesh escreveu:
> From: Mahesh Kumar <mahesh1.kumar@intel.com>

First of all, good catch with this patch!

> 
> This patch changes Watermak calculation to fixed point calculation.
> Problem with current calculation is during plane_blocks_per_line
> calculation we divide intermediate blocks with min_scanlines and
> takes floor of the result because of integer operation.
> hence we end-up assigning less blocks than required. Which leads to
> flickers.

This problem is that the patch doesn't only do this. It also tries to
make the method1 result be a fixed point 16.16, and it also does a
completely unrelated change with the addition of the y_tiled variable.
I'd really like of the 3 changes were 3 different patches.


> 
> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 23 ++++++++++++++---------
>  1 file changed, 14 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index 0ec328b..d4390e4 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3530,13 +3530,15 @@ static uint32_t skl_pipe_pixel_rate(const
> struct intel_crtc_state *config)
>  */
>  static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp,
> uint32_t latency)
>  {
> -	uint32_t wm_intermediate_val, ret;
> +	uint64_t wm_intermediate_val;
> +	uint32_t ret;
>  
>  	if (latency == 0)
>  		return UINT_MAX;
>  
> -	wm_intermediate_val = latency * pixel_rate * cpp / 512;
> -	ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
> +	wm_intermediate_val = latency * pixel_rate * cpp;
> +	wm_intermediate_val <<= 16;
> +	ret = DIV_ROUND_UP_ULL(wm_intermediate_val, 1000 * 512);
>  
>  	return ret;
>  }

So shouldn't we fix the callers of skl_wm_method1 to take into
consideration that the value returned is now a fixed point 16.16?
Sounds like we have a bug here.

Also, having method1 be 16.16 and method2 be a normal integer adds a
lot to the confusion.


> @@ -3605,6 +3607,7 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>  	uint16_t *out_blocks = &result->plane_res_b[id];
>  	uint8_t *out_lines = &result->plane_res_l[id];
>  	enum watermark_memory_wa mem_wa;
> +	bool y_tiled = false;

Please either discard these y_tiled chunks or move them to a separate
patch. And since we have the y_tiled variable, maybe we'd also like to
have an x_tiled variable for consistency between the checks?


>  
>  	if (latency == 0 || !cstate->base.active || !intel_pstate-
> >base.visible)
>  		return 0;
> @@ -3652,14 +3655,18 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>  	plane_bytes_per_line = width * cpp;
>  	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
>  	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
> +		y_tiled = true;
>  		plane_blocks_per_line =
>  		      DIV_ROUND_UP(plane_bytes_per_line *
> y_min_scanlines, 512);
> -		plane_blocks_per_line /= y_min_scanlines;
> +		plane_blocks_per_line = (plane_blocks_per_line <<
> 16) /
> +								y_mi
> n_scanlines;
>  	} else if (fb->modifier[0] == DRM_FORMAT_MOD_NONE) {
>  		plane_blocks_per_line =
> DIV_ROUND_UP(plane_bytes_per_line, 512)
>  					+ 1;
> +		plane_blocks_per_line <<= 16;
>  	} else {
>  		plane_blocks_per_line =
> DIV_ROUND_UP(plane_bytes_per_line, 512);
> +		plane_blocks_per_line <<= 16;
>  	}

This is probably a rebase problem, but not all places where
plane_blocks_per_line are used were fixed to take into consideration
the new 16.16 format.


Now, what I've been thinking is that we completely fail at type-safety
when we mix these normal integers with the 16.16 integers. Ideally, we
should find a way to make the compiler complain about this to us, since
it's too easy to make such mistakes. Can't we try to make the compiler
help us, such as by defining something like

typedef struct {
	uint32_t val;
} uint_fixed_16_16_t;

and then making the appropriate functions/macros for maniuplating them
and mixing them with integers? This would help catching problems such
as the one we have here. Also, other pieces of i915.ko and drm.ko could
benefit from this.

>  
>  	method1 = skl_wm_method1(plane_pixel_rate, cpp, latency);
> @@ -3670,8 +3677,7 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>  
>  	y_tile_minimum = plane_blocks_per_line * y_min_scanlines;
>  
> -	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
> -	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
> +	if (y_tiled) {
>  		selected_result = max(method2, y_tile_minimum);
>  	} else {
>  		uint32_t linetime_us = 0;
> @@ -3688,12 +3694,11 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>  			selected_result = method1;
>  	}
>  
> -	res_blocks = selected_result + 1;
> +	res_blocks = DIV_ROUND_UP(selected_result, 1 << 16) + 1;
>  	res_lines = DIV_ROUND_UP(selected_result,
> plane_blocks_per_line);
>  
>  	if (level >= 1 && level <= 7) {
> -		if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
> -		    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
> +		if (y_tiled) {
>  			res_blocks += y_tile_minimum;
>  			res_lines += y_min_scanlines;
>  		} else {
Kumar, Mahesh Sept. 22, 2016, 9:25 a.m. UTC | #2
Hi,


On Thursday 22 September 2016 12:02 AM, Paulo Zanoni wrote:
> Hi
>
> Em Sex, 2016-09-09 às 13:31 +0530, Kumar, Mahesh escreveu:
>> From: Mahesh Kumar <mahesh1.kumar@intel.com>
> First of all, good catch with this patch!
>
>> This patch changes Watermak calculation to fixed point calculation.
>> Problem with current calculation is during plane_blocks_per_line
>> calculation we divide intermediate blocks with min_scanlines and
>> takes floor of the result because of integer operation.
>> hence we end-up assigning less blocks than required. Which leads to
>> flickers.
> This problem is that the patch doesn't only do this. It also tries to
> make the method1 result be a fixed point 16.16, and it also does a
> completely unrelated change with the addition of the y_tiled variable.
> I'd really like of the 3 changes were 3 different patches.
>
agree, y_tiled, change can go separately, this was just, I don't wanted 
to write complete fb->modifier name always :).
But imo making method-1 & method-2 output 16.16 fixed point should go in 
single patch. (plane_blocks_per_line, method_1 & method_2 to 16.16)
>> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_pm.c | 23 ++++++++++++++---------
>>   1 file changed, 14 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c
>> b/drivers/gpu/drm/i915/intel_pm.c
>> index 0ec328b..d4390e4 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -3530,13 +3530,15 @@ static uint32_t skl_pipe_pixel_rate(const
>> struct intel_crtc_state *config)
>>   */
>>   static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp,
>> uint32_t latency)
>>   {
>> -	uint32_t wm_intermediate_val, ret;
>> +	uint64_t wm_intermediate_val;
>> +	uint32_t ret;
>>   
>>   	if (latency == 0)
>>   		return UINT_MAX;
>>   
>> -	wm_intermediate_val = latency * pixel_rate * cpp / 512;
>> -	ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
>> +	wm_intermediate_val = latency * pixel_rate * cpp;
>> +	wm_intermediate_val <<= 16;
>> +	ret = DIV_ROUND_UP_ULL(wm_intermediate_val, 1000 * 512);
>>   
>>   	return ret;
>>   }
> So shouldn't we fix the callers of skl_wm_method1 to take into
> consideration that the value returned is now a fixed point 16.16?
> Sounds like we have a bug here.
caller is now considering it as 16.16 only
>
> Also, having method1 be 16.16 and method2 be a normal integer adds a
> lot to the confusion.
Actually method-2 output is also 16.16 fixed point. Because we are 
multiplying it with plane_blocks_per_line, which is 16.16.
>
>
>> @@ -3605,6 +3607,7 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>>   	uint16_t *out_blocks = &result->plane_res_b[id];
>>   	uint8_t *out_lines = &result->plane_res_l[id];
>>   	enum watermark_memory_wa mem_wa;
>> +	bool y_tiled = false;
> Please either discard these y_tiled chunks or move them to a separate
> patch. And since we have the y_tiled variable, maybe we'd also like to
> have an x_tiled variable for consistency between the checks?
will move y_tiled to other patch, if we have another variable for 
x_tiled then we need one for none_tiled also.
I'm not sure how useful it would be.
>
>
>>   
>>   	if (latency == 0 || !cstate->base.active || !intel_pstate-
>>> base.visible)
>>   		return 0;
>> @@ -3652,14 +3655,18 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>>   	plane_bytes_per_line = width * cpp;
>>   	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
>>   	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
>> +		y_tiled = true;
>>   		plane_blocks_per_line =
>>   		      DIV_ROUND_UP(plane_bytes_per_line *
>> y_min_scanlines, 512);
>> -		plane_blocks_per_line /= y_min_scanlines;
>> +		plane_blocks_per_line = (plane_blocks_per_line <<
>> 16) /
>> +								y_mi
>> n_scanlines;
>>   	} else if (fb->modifier[0] == DRM_FORMAT_MOD_NONE) {
>>   		plane_blocks_per_line =
>> DIV_ROUND_UP(plane_bytes_per_line, 512)
>>   					+ 1;
>> +		plane_blocks_per_line <<= 16;
>>   	} else {
>>   		plane_blocks_per_line =
>> DIV_ROUND_UP(plane_bytes_per_line, 512);
>> +		plane_blocks_per_line <<= 16;
>>   	}
> This is probably a rebase problem, but not all places where
> plane_blocks_per_line are used were fixed to take into consideration
> the new 16.16 format.
>
>
> Now, what I've been thinking is that we completely fail at type-safety
> when we mix these normal integers with the 16.16 integers. Ideally, we
> should find a way to make the compiler complain about this to us, since
> it's too easy to make such mistakes. Can't we try to make the compiler
> help us, such as by defining something like
>
> typedef struct {
> 	uint32_t val;
> } uint_fixed_16_16_t;
sounds good to have different type for fixed_16_16.

Regards,
-Mahesh
>
> and then making the appropriate functions/macros for maniuplating them
> and mixing them with integers? This would help catching problems such
> as the one we have here. Also, other pieces of i915.ko and drm.ko could
> benefit from this.
>
>>   
>>   	method1 = skl_wm_method1(plane_pixel_rate, cpp, latency);
>> @@ -3670,8 +3677,7 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>>   
>>   	y_tile_minimum = plane_blocks_per_line * y_min_scanlines;
>>   
>> -	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
>> -	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
>> +	if (y_tiled) {
>>   		selected_result = max(method2, y_tile_minimum);
>>   	} else {
>>   		uint32_t linetime_us = 0;
>> @@ -3688,12 +3694,11 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>>   			selected_result = method1;
>>   	}
>>   
>> -	res_blocks = selected_result + 1;
>> +	res_blocks = DIV_ROUND_UP(selected_result, 1 << 16) + 1;
>>   	res_lines = DIV_ROUND_UP(selected_result,
>> plane_blocks_per_line);
>>   
>>   	if (level >= 1 && level <= 7) {
>> -		if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
>> -		    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
>> +		if (y_tiled) {
>>   			res_blocks += y_tile_minimum;
>>   			res_lines += y_min_scanlines;
>>   		} else {
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 0ec328b..d4390e4 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3530,13 +3530,15 @@  static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config)
 */
 static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency)
 {
-	uint32_t wm_intermediate_val, ret;
+	uint64_t wm_intermediate_val;
+	uint32_t ret;
 
 	if (latency == 0)
 		return UINT_MAX;
 
-	wm_intermediate_val = latency * pixel_rate * cpp / 512;
-	ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
+	wm_intermediate_val = latency * pixel_rate * cpp;
+	wm_intermediate_val <<= 16;
+	ret = DIV_ROUND_UP_ULL(wm_intermediate_val, 1000 * 512);
 
 	return ret;
 }
@@ -3605,6 +3607,7 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	uint16_t *out_blocks = &result->plane_res_b[id];
 	uint8_t *out_lines = &result->plane_res_l[id];
 	enum watermark_memory_wa mem_wa;
+	bool y_tiled = false;
 
 	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible)
 		return 0;
@@ -3652,14 +3655,18 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	plane_bytes_per_line = width * cpp;
 	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
 	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
+		y_tiled = true;
 		plane_blocks_per_line =
 		      DIV_ROUND_UP(plane_bytes_per_line * y_min_scanlines, 512);
-		plane_blocks_per_line /= y_min_scanlines;
+		plane_blocks_per_line = (plane_blocks_per_line << 16) /
+								y_min_scanlines;
 	} else if (fb->modifier[0] == DRM_FORMAT_MOD_NONE) {
 		plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512)
 					+ 1;
+		plane_blocks_per_line <<= 16;
 	} else {
 		plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
+		plane_blocks_per_line <<= 16;
 	}
 
 	method1 = skl_wm_method1(plane_pixel_rate, cpp, latency);
@@ -3670,8 +3677,7 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 
 	y_tile_minimum = plane_blocks_per_line * y_min_scanlines;
 
-	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
-	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
+	if (y_tiled) {
 		selected_result = max(method2, y_tile_minimum);
 	} else {
 		uint32_t linetime_us = 0;
@@ -3688,12 +3694,11 @@  static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 			selected_result = method1;
 	}
 
-	res_blocks = selected_result + 1;
+	res_blocks = DIV_ROUND_UP(selected_result, 1 << 16) + 1;
 	res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line);
 
 	if (level >= 1 && level <= 7) {
-		if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
-		    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
+		if (y_tiled) {
 			res_blocks += y_tile_minimum;
 			res_lines += y_min_scanlines;
 		} else {