drm/i915: Rewrite vlv_find_best_dpll()

Message ID	1379959390-2686-1-git-send-email-ville.syrjala@linux.intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org> From: ville.syrjala@linux.intel.com To: intel-gfx@lists.freedesktop.org Date: Mon, 23 Sep 2013 21:03:10 +0300 Message-Id: <1379959390-2686-1-git-send-email-ville.syrjala@linux.intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH] drm/i915: Rewrite vlv_find_best_dpll() Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Sender: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org

Message ID

1379959390-2686-1-git-send-email-ville.syrjala@linux.intel.com (mailing list archive)

State

New, archived

Headers

From: ville.syrjala@linux.intel.com
To: intel-gfx@lists.freedesktop.org
Date: Mon, 23 Sep 2013 21:03:10 +0300
Message-Id: <1379959390-2686-1-git-send-email-ville.syrjala@linux.intel.com>
MIME-Version: 1.0
Subject: [Intel-gfx] [PATCH] drm/i915: Rewrite vlv_find_best_dpll()
Precedence: list
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Sender: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org
Errors-To: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org

Commit Message

Ville Syrjälä Sept. 23, 2013, 6:03 p.m. UTC

From: Ville Syrjälä <ville.syrjala@linux.intel.com>

vlv_find_best_dpll() has several integer over/underflow issues,
includes a hand rolled DIV_ROUND_CLOSEST(), has a boat load of
variables, some slightly weird math, and it doesn't look very
nice either.

Rather than try to deal with each issue separately I just decided
to rewrite the function a bit.

WARNING: Entirely untested

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 94 +++++++++++++++---------------------
 1 file changed, 40 insertions(+), 54 deletions(-)

Comments

Daniel Vetter Sept. 24, 2013, 9:23 a.m. UTC | #1

On Mon, Sep 23, 2013 at 09:03:10PM +0300, ville.syrjala@linux.intel.com wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> vlv_find_best_dpll() has several integer over/underflow issues,
> includes a hand rolled DIV_ROUND_CLOSEST(), has a boat load of
> variables, some slightly weird math, and it doesn't look very
> nice either.
> 
> Rather than try to deal with each issue separately I just decided
> to rewrite the function a bit.
> 
> WARNING: Entirely untested
> 
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_display.c | 94 +++++++++++++++---------------------
>  1 file changed, 40 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 3b06250..f89fb12 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -670,65 +670,51 @@ vlv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc,
>  		   int target, int refclk, intel_clock_t *match_clock,
>  		   intel_clock_t *best_clock)
>  {
> -	u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2;
> -	u32 m, n, fastclk;
> -	u32 updrate, minupdate, p;
> -	unsigned long bestppm, ppm, absppm;
> -	int dotclk, flag;
> -
> -	flag = 0;
> -	dotclk = target * 1000;
> -	bestppm = 1000000;
> -	ppm = absppm = 0;
> -	fastclk = dotclk / (2*100);
> -	updrate = 0;
> -	minupdate = 19200;
> -	n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0;
> -	bestm1 = bestm2 = bestp1 = bestp2 = 0;
> +	intel_clock_t clock = {
> +		.dot = target * 5, /* fast clock */
> +	};
> +	unsigned int bestppm = 1000000;
> +	/* min update 19.2 MHz */
> +	int max_n = min(limit->n.max, refclk / 19200);
>  
>  	/* based on hardware requirement, prefer smaller n to precision */
> -	for (n = limit->n.min; n <= ((refclk) / minupdate); n++) {
> -		updrate = refclk / n;
> -		for (p1 = limit->p1.max; p1 > limit->p1.min; p1--) {
> -			for (p2 = limit->p2.p2_fast+1; p2 > 0; p2--) {
> -				if (p2 > 10)
> -					p2 = p2 - 1;
> -				p = p1 * p2;
> -				/* based on hardware requirement, prefer bigger m1,m2 values */
> -				for (m1 = limit->m1.min; m1 <= limit->m1.max; m1++) {
> -					m2 = (((2*(fastclk * p * n / m1 )) +
> -					       refclk) / (2*refclk));
> -					m = m1 * m2;
> -					vco = updrate * m;
> -					if (vco >= limit->vco.min && vco < limit->vco.max) {
> -						ppm = 1000000 * ((vco / p) - fastclk) / fastclk;
> -						absppm = (ppm > 0) ? ppm : (-ppm);
> -						if (absppm < 100 && ((p1 * p2) > (bestp1 * bestp2))) {
> -							bestppm = 0;
> -							flag = 1;
> -						}
> -						if (absppm < bestppm - 10) {
> -							bestppm = absppm;
> -							flag = 1;
> -						}
> -						if (flag) {
> -							bestn = n;
> -							bestm1 = m1;
> -							bestm2 = m2;
> -							bestp1 = p1;
> -							bestp2 = p2;
> -							flag = 0;
> -						}
> -					}
> -				}
> +	for (clock.n = limit->n.min; clock.n <= max_n; clock.n++) {
> +	for (clock.p1 = limit->p1.max; clock.p1 > limit->p1.min; clock.p1--) {
> +	for (clock.p2 = limit->p2.p2_fast+1; clock.p2 > 0; clock.p2--) {

I think that's going to upset the coding style police ;-) I guess it would
be simple to extract a vlv_compute_clock like we have for pnv/i9xx that's
both used here and in the get_clock code from Jesse.
-Daniel


> +		if (clock.p2 > 10)
> +			clock.p2--;
> +		clock.p = clock.p1 * clock.p2;
> +
> +		/* based on hardware requirement, prefer bigger m1,m2 values */
> +		for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max; clock.m1++) {
> +			unsigned int ppm, diff;
> +
> +			clock.m2 = DIV_ROUND_CLOSEST(clock.dot * clock.p * clock.n,
> +						     clock.m1 * refclk);
> +			clock.m = clock.m1 * clock.m2;
> +
> +			clock.vco = refclk * clock.m / clock.n;
> +
> +			if (clock.vco < limit->vco.min ||
> +			    clock.vco >= limit->vco.max)
> +				continue;
> +
> +			diff = abs(clock.vco / clock.p - clock.dot);
> +			ppm = div_u64(1000000ULL * diff, clock.dot);
> +
> +			if (ppm < 100 && clock.p > best_clock->p) {
> +				bestppm = 0;
> +				*best_clock = clock;
> +			}
> +
> +			if (ppm + 10 < bestppm) {
> +				bestppm = ppm;
> +				*best_clock = clock;
>  			}
>  		}
>  	}
> -	best_clock->n = bestn;
> -	best_clock->m1 = bestm1;
> -	best_clock->m2 = bestm2;
> -	best_clock->p1 = bestp1;
> -	best_clock->p2 = bestp2;
> +	}
> +	}
>  
>  	return true;
>  }
> -- 
> 1.8.1.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Ville Syrjälä Sept. 24, 2013, 11:15 a.m. UTC | #2

On Tue, Sep 24, 2013 at 11:23:31AM +0200, Daniel Vetter wrote:
> On Mon, Sep 23, 2013 at 09:03:10PM +0300, ville.syrjala@linux.intel.com wrote:
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > 
> > vlv_find_best_dpll() has several integer over/underflow issues,
> > includes a hand rolled DIV_ROUND_CLOSEST(), has a boat load of
> > variables, some slightly weird math, and it doesn't look very
> > nice either.
> > 
> > Rather than try to deal with each issue separately I just decided
> > to rewrite the function a bit.
> > 
> > WARNING: Entirely untested
> > 
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_display.c | 94 +++++++++++++++---------------------
> >  1 file changed, 40 insertions(+), 54 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index 3b06250..f89fb12 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -670,65 +670,51 @@ vlv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc,
> >  		   int target, int refclk, intel_clock_t *match_clock,
> >  		   intel_clock_t *best_clock)
> >  {
> > -	u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2;
> > -	u32 m, n, fastclk;
> > -	u32 updrate, minupdate, p;
> > -	unsigned long bestppm, ppm, absppm;
> > -	int dotclk, flag;
> > -
> > -	flag = 0;
> > -	dotclk = target * 1000;
> > -	bestppm = 1000000;
> > -	ppm = absppm = 0;
> > -	fastclk = dotclk / (2*100);
> > -	updrate = 0;
> > -	minupdate = 19200;
> > -	n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0;
> > -	bestm1 = bestm2 = bestp1 = bestp2 = 0;
> > +	intel_clock_t clock = {
> > +		.dot = target * 5, /* fast clock */
> > +	};
> > +	unsigned int bestppm = 1000000;
> > +	/* min update 19.2 MHz */
> > +	int max_n = min(limit->n.max, refclk / 19200);
> >  
> >  	/* based on hardware requirement, prefer smaller n to precision */
> > -	for (n = limit->n.min; n <= ((refclk) / minupdate); n++) {
> > -		updrate = refclk / n;
> > -		for (p1 = limit->p1.max; p1 > limit->p1.min; p1--) {
> > -			for (p2 = limit->p2.p2_fast+1; p2 > 0; p2--) {
> > -				if (p2 > 10)
> > -					p2 = p2 - 1;
> > -				p = p1 * p2;
> > -				/* based on hardware requirement, prefer bigger m1,m2 values */
> > -				for (m1 = limit->m1.min; m1 <= limit->m1.max; m1++) {
> > -					m2 = (((2*(fastclk * p * n / m1 )) +
> > -					       refclk) / (2*refclk));
> > -					m = m1 * m2;
> > -					vco = updrate * m;
> > -					if (vco >= limit->vco.min && vco < limit->vco.max) {
> > -						ppm = 1000000 * ((vco / p) - fastclk) / fastclk;
> > -						absppm = (ppm > 0) ? ppm : (-ppm);
> > -						if (absppm < 100 && ((p1 * p2) > (bestp1 * bestp2))) {
> > -							bestppm = 0;
> > -							flag = 1;
> > -						}
> > -						if (absppm < bestppm - 10) {
> > -							bestppm = absppm;
> > -							flag = 1;
> > -						}
> > -						if (flag) {
> > -							bestn = n;
> > -							bestm1 = m1;
> > -							bestm2 = m2;
> > -							bestp1 = p1;
> > -							bestp2 = p2;
> > -							flag = 0;
> > -						}
> > -					}
> > -				}
> > +	for (clock.n = limit->n.min; clock.n <= max_n; clock.n++) {
> > +	for (clock.p1 = limit->p1.max; clock.p1 > limit->p1.min; clock.p1--) {
> > +	for (clock.p2 = limit->p2.p2_fast+1; clock.p2 > 0; clock.p2--) {
> 
> I think that's going to upset the coding style police ;-)

I suppose. But I think it's a semi-decent way of avoiding deep nesting
in these loops-within-loops situations. Obviosuly if there's any code
other than the internal loop contained in the outer loop, I would not
use it. But I'm not really attached to this approach, so I'm fine with
indenting each loop if that's what people prefer.

BTW now that I look at the code again, I'm wondering why we're checking
'p1 > p1.min' instead of 'p1 >= p1.min'?

> I guess it would
> be simple to extract a vlv_compute_clock like we have for pnv/i9xx that's
> both used here and in the get_clock code from Jesse.

Right. I can do that.

> -Daniel
> 
> 
> > +		if (clock.p2 > 10)
> > +			clock.p2--;
> > +		clock.p = clock.p1 * clock.p2;
> > +
> > +		/* based on hardware requirement, prefer bigger m1,m2 values */
> > +		for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max; clock.m1++) {
> > +			unsigned int ppm, diff;
> > +
> > +			clock.m2 = DIV_ROUND_CLOSEST(clock.dot * clock.p * clock.n,
> > +						     clock.m1 * refclk);
> > +			clock.m = clock.m1 * clock.m2;
> > +
> > +			clock.vco = refclk * clock.m / clock.n;
> > +
> > +			if (clock.vco < limit->vco.min ||
> > +			    clock.vco >= limit->vco.max)
> > +				continue;
> > +
> > +			diff = abs(clock.vco / clock.p - clock.dot);
> > +			ppm = div_u64(1000000ULL * diff, clock.dot);
> > +
> > +			if (ppm < 100 && clock.p > best_clock->p) {
> > +				bestppm = 0;
> > +				*best_clock = clock;
> > +			}
> > +
> > +			if (ppm + 10 < bestppm) {
> > +				bestppm = ppm;
> > +				*best_clock = clock;
> >  			}
> >  		}
> >  	}
> > -	best_clock->n = bestn;
> > -	best_clock->m1 = bestm1;
> > -	best_clock->m2 = bestm2;
> > -	best_clock->p1 = bestp1;
> > -	best_clock->p2 = bestp2;
> > +	}
> > +	}
> >  
> >  	return true;
> >  }
> > -- 
> > 1.8.1.5
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3b06250..f89fb12 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -670,65 +670,51 @@  vlv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc,
 		   int target, int refclk, intel_clock_t *match_clock,
 		   intel_clock_t *best_clock)
 {
-	u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2;
-	u32 m, n, fastclk;
-	u32 updrate, minupdate, p;
-	unsigned long bestppm, ppm, absppm;
-	int dotclk, flag;
-
-	flag = 0;
-	dotclk = target * 1000;
-	bestppm = 1000000;
-	ppm = absppm = 0;
-	fastclk = dotclk / (2*100);
-	updrate = 0;
-	minupdate = 19200;
-	n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0;
-	bestm1 = bestm2 = bestp1 = bestp2 = 0;
+	intel_clock_t clock = {
+		.dot = target * 5, /* fast clock */
+	};
+	unsigned int bestppm = 1000000;
+	/* min update 19.2 MHz */
+	int max_n = min(limit->n.max, refclk / 19200);
 
 	/* based on hardware requirement, prefer smaller n to precision */
-	for (n = limit->n.min; n <= ((refclk) / minupdate); n++) {
-		updrate = refclk / n;
-		for (p1 = limit->p1.max; p1 > limit->p1.min; p1--) {
-			for (p2 = limit->p2.p2_fast+1; p2 > 0; p2--) {
-				if (p2 > 10)
-					p2 = p2 - 1;
-				p = p1 * p2;
-				/* based on hardware requirement, prefer bigger m1,m2 values */
-				for (m1 = limit->m1.min; m1 <= limit->m1.max; m1++) {
-					m2 = (((2*(fastclk * p * n / m1 )) +
-					       refclk) / (2*refclk));
-					m = m1 * m2;
-					vco = updrate * m;
-					if (vco >= limit->vco.min && vco < limit->vco.max) {
-						ppm = 1000000 * ((vco / p) - fastclk) / fastclk;
-						absppm = (ppm > 0) ? ppm : (-ppm);
-						if (absppm < 100 && ((p1 * p2) > (bestp1 * bestp2))) {
-							bestppm = 0;
-							flag = 1;
-						}
-						if (absppm < bestppm - 10) {
-							bestppm = absppm;
-							flag = 1;
-						}
-						if (flag) {
-							bestn = n;
-							bestm1 = m1;
-							bestm2 = m2;
-							bestp1 = p1;
-							bestp2 = p2;
-							flag = 0;
-						}
-					}
-				}
+	for (clock.n = limit->n.min; clock.n <= max_n; clock.n++) {
+	for (clock.p1 = limit->p1.max; clock.p1 > limit->p1.min; clock.p1--) {
+	for (clock.p2 = limit->p2.p2_fast+1; clock.p2 > 0; clock.p2--) {
+		if (clock.p2 > 10)
+			clock.p2--;
+		clock.p = clock.p1 * clock.p2;
+
+		/* based on hardware requirement, prefer bigger m1,m2 values */
+		for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max; clock.m1++) {
+			unsigned int ppm, diff;
+
+			clock.m2 = DIV_ROUND_CLOSEST(clock.dot * clock.p * clock.n,
+						     clock.m1 * refclk);
+			clock.m = clock.m1 * clock.m2;
+
+			clock.vco = refclk * clock.m / clock.n;
+
+			if (clock.vco < limit->vco.min ||
+			    clock.vco >= limit->vco.max)
+				continue;
+
+			diff = abs(clock.vco / clock.p - clock.dot);
+			ppm = div_u64(1000000ULL * diff, clock.dot);
+
+			if (ppm < 100 && clock.p > best_clock->p) {
+				bestppm = 0;
+				*best_clock = clock;
+			}
+
+			if (ppm + 10 < bestppm) {
+				bestppm = ppm;
+				*best_clock = clock;
 			}
 		}
 	}
-	best_clock->n = bestn;
-	best_clock->m1 = bestm1;
-	best_clock->m2 = bestm2;
-	best_clock->p1 = bestp1;
-	best_clock->p2 = bestp2;
+	}
+	}
 
 	return true;
 }

drm/i915: Rewrite vlv_find_best_dpll()

Commit Message

Comments

Patch