Message ID | 1379959390-2686-1-git-send-email-ville.syrjala@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Sep 23, 2013 at 09:03:10PM +0300, ville.syrjala@linux.intel.com wrote: > From: Ville Syrjälä <ville.syrjala@linux.intel.com> > > vlv_find_best_dpll() has several integer over/underflow issues, > includes a hand rolled DIV_ROUND_CLOSEST(), has a boat load of > variables, some slightly weird math, and it doesn't look very > nice either. > > Rather than try to deal with each issue separately I just decided > to rewrite the function a bit. > > WARNING: Entirely untested > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> > --- > drivers/gpu/drm/i915/intel_display.c | 94 +++++++++++++++--------------------- > 1 file changed, 40 insertions(+), 54 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index 3b06250..f89fb12 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -670,65 +670,51 @@ vlv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc, > int target, int refclk, intel_clock_t *match_clock, > intel_clock_t *best_clock) > { > - u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2; > - u32 m, n, fastclk; > - u32 updrate, minupdate, p; > - unsigned long bestppm, ppm, absppm; > - int dotclk, flag; > - > - flag = 0; > - dotclk = target * 1000; > - bestppm = 1000000; > - ppm = absppm = 0; > - fastclk = dotclk / (2*100); > - updrate = 0; > - minupdate = 19200; > - n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0; > - bestm1 = bestm2 = bestp1 = bestp2 = 0; > + intel_clock_t clock = { > + .dot = target * 5, /* fast clock */ > + }; > + unsigned int bestppm = 1000000; > + /* min update 19.2 MHz */ > + int max_n = min(limit->n.max, refclk / 19200); > > /* based on hardware requirement, prefer smaller n to precision */ > - for (n = limit->n.min; n <= ((refclk) / minupdate); n++) { > - updrate = refclk / n; > - for (p1 = limit->p1.max; p1 > limit->p1.min; p1--) { > - for (p2 = limit->p2.p2_fast+1; p2 > 0; p2--) { > - if (p2 > 10) > - p2 = p2 - 1; > - p = p1 * p2; > - /* based on hardware requirement, prefer bigger m1,m2 values */ > - for (m1 = limit->m1.min; m1 <= limit->m1.max; m1++) { > - m2 = (((2*(fastclk * p * n / m1 )) + > - refclk) / (2*refclk)); > - m = m1 * m2; > - vco = updrate * m; > - if (vco >= limit->vco.min && vco < limit->vco.max) { > - ppm = 1000000 * ((vco / p) - fastclk) / fastclk; > - absppm = (ppm > 0) ? ppm : (-ppm); > - if (absppm < 100 && ((p1 * p2) > (bestp1 * bestp2))) { > - bestppm = 0; > - flag = 1; > - } > - if (absppm < bestppm - 10) { > - bestppm = absppm; > - flag = 1; > - } > - if (flag) { > - bestn = n; > - bestm1 = m1; > - bestm2 = m2; > - bestp1 = p1; > - bestp2 = p2; > - flag = 0; > - } > - } > - } > + for (clock.n = limit->n.min; clock.n <= max_n; clock.n++) { > + for (clock.p1 = limit->p1.max; clock.p1 > limit->p1.min; clock.p1--) { > + for (clock.p2 = limit->p2.p2_fast+1; clock.p2 > 0; clock.p2--) { I think that's going to upset the coding style police ;-) I guess it would be simple to extract a vlv_compute_clock like we have for pnv/i9xx that's both used here and in the get_clock code from Jesse. -Daniel > + if (clock.p2 > 10) > + clock.p2--; > + clock.p = clock.p1 * clock.p2; > + > + /* based on hardware requirement, prefer bigger m1,m2 values */ > + for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max; clock.m1++) { > + unsigned int ppm, diff; > + > + clock.m2 = DIV_ROUND_CLOSEST(clock.dot * clock.p * clock.n, > + clock.m1 * refclk); > + clock.m = clock.m1 * clock.m2; > + > + clock.vco = refclk * clock.m / clock.n; > + > + if (clock.vco < limit->vco.min || > + clock.vco >= limit->vco.max) > + continue; > + > + diff = abs(clock.vco / clock.p - clock.dot); > + ppm = div_u64(1000000ULL * diff, clock.dot); > + > + if (ppm < 100 && clock.p > best_clock->p) { > + bestppm = 0; > + *best_clock = clock; > + } > + > + if (ppm + 10 < bestppm) { > + bestppm = ppm; > + *best_clock = clock; > } > } > } > - best_clock->n = bestn; > - best_clock->m1 = bestm1; > - best_clock->m2 = bestm2; > - best_clock->p1 = bestp1; > - best_clock->p2 = bestp2; > + } > + } > > return true; > } > -- > 1.8.1.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Tue, Sep 24, 2013 at 11:23:31AM +0200, Daniel Vetter wrote: > On Mon, Sep 23, 2013 at 09:03:10PM +0300, ville.syrjala@linux.intel.com wrote: > > From: Ville Syrjälä <ville.syrjala@linux.intel.com> > > > > vlv_find_best_dpll() has several integer over/underflow issues, > > includes a hand rolled DIV_ROUND_CLOSEST(), has a boat load of > > variables, some slightly weird math, and it doesn't look very > > nice either. > > > > Rather than try to deal with each issue separately I just decided > > to rewrite the function a bit. > > > > WARNING: Entirely untested > > > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> > > --- > > drivers/gpu/drm/i915/intel_display.c | 94 +++++++++++++++--------------------- > > 1 file changed, 40 insertions(+), 54 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > > index 3b06250..f89fb12 100644 > > --- a/drivers/gpu/drm/i915/intel_display.c > > +++ b/drivers/gpu/drm/i915/intel_display.c > > @@ -670,65 +670,51 @@ vlv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc, > > int target, int refclk, intel_clock_t *match_clock, > > intel_clock_t *best_clock) > > { > > - u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2; > > - u32 m, n, fastclk; > > - u32 updrate, minupdate, p; > > - unsigned long bestppm, ppm, absppm; > > - int dotclk, flag; > > - > > - flag = 0; > > - dotclk = target * 1000; > > - bestppm = 1000000; > > - ppm = absppm = 0; > > - fastclk = dotclk / (2*100); > > - updrate = 0; > > - minupdate = 19200; > > - n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0; > > - bestm1 = bestm2 = bestp1 = bestp2 = 0; > > + intel_clock_t clock = { > > + .dot = target * 5, /* fast clock */ > > + }; > > + unsigned int bestppm = 1000000; > > + /* min update 19.2 MHz */ > > + int max_n = min(limit->n.max, refclk / 19200); > > > > /* based on hardware requirement, prefer smaller n to precision */ > > - for (n = limit->n.min; n <= ((refclk) / minupdate); n++) { > > - updrate = refclk / n; > > - for (p1 = limit->p1.max; p1 > limit->p1.min; p1--) { > > - for (p2 = limit->p2.p2_fast+1; p2 > 0; p2--) { > > - if (p2 > 10) > > - p2 = p2 - 1; > > - p = p1 * p2; > > - /* based on hardware requirement, prefer bigger m1,m2 values */ > > - for (m1 = limit->m1.min; m1 <= limit->m1.max; m1++) { > > - m2 = (((2*(fastclk * p * n / m1 )) + > > - refclk) / (2*refclk)); > > - m = m1 * m2; > > - vco = updrate * m; > > - if (vco >= limit->vco.min && vco < limit->vco.max) { > > - ppm = 1000000 * ((vco / p) - fastclk) / fastclk; > > - absppm = (ppm > 0) ? ppm : (-ppm); > > - if (absppm < 100 && ((p1 * p2) > (bestp1 * bestp2))) { > > - bestppm = 0; > > - flag = 1; > > - } > > - if (absppm < bestppm - 10) { > > - bestppm = absppm; > > - flag = 1; > > - } > > - if (flag) { > > - bestn = n; > > - bestm1 = m1; > > - bestm2 = m2; > > - bestp1 = p1; > > - bestp2 = p2; > > - flag = 0; > > - } > > - } > > - } > > + for (clock.n = limit->n.min; clock.n <= max_n; clock.n++) { > > + for (clock.p1 = limit->p1.max; clock.p1 > limit->p1.min; clock.p1--) { > > + for (clock.p2 = limit->p2.p2_fast+1; clock.p2 > 0; clock.p2--) { > > I think that's going to upset the coding style police ;-) I suppose. But I think it's a semi-decent way of avoiding deep nesting in these loops-within-loops situations. Obviosuly if there's any code other than the internal loop contained in the outer loop, I would not use it. But I'm not really attached to this approach, so I'm fine with indenting each loop if that's what people prefer. BTW now that I look at the code again, I'm wondering why we're checking 'p1 > p1.min' instead of 'p1 >= p1.min'? > I guess it would > be simple to extract a vlv_compute_clock like we have for pnv/i9xx that's > both used here and in the get_clock code from Jesse. Right. I can do that. > -Daniel > > > > + if (clock.p2 > 10) > > + clock.p2--; > > + clock.p = clock.p1 * clock.p2; > > + > > + /* based on hardware requirement, prefer bigger m1,m2 values */ > > + for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max; clock.m1++) { > > + unsigned int ppm, diff; > > + > > + clock.m2 = DIV_ROUND_CLOSEST(clock.dot * clock.p * clock.n, > > + clock.m1 * refclk); > > + clock.m = clock.m1 * clock.m2; > > + > > + clock.vco = refclk * clock.m / clock.n; > > + > > + if (clock.vco < limit->vco.min || > > + clock.vco >= limit->vco.max) > > + continue; > > + > > + diff = abs(clock.vco / clock.p - clock.dot); > > + ppm = div_u64(1000000ULL * diff, clock.dot); > > + > > + if (ppm < 100 && clock.p > best_clock->p) { > > + bestppm = 0; > > + *best_clock = clock; > > + } > > + > > + if (ppm + 10 < bestppm) { > > + bestppm = ppm; > > + *best_clock = clock; > > } > > } > > } > > - best_clock->n = bestn; > > - best_clock->m1 = bestm1; > > - best_clock->m2 = bestm2; > > - best_clock->p1 = bestp1; > > - best_clock->p2 = bestp2; > > + } > > + } > > > > return true; > > } > > -- > > 1.8.1.5 > > > > _______________________________________________ > > Intel-gfx mailing list > > Intel-gfx@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx > > -- > Daniel Vetter > Software Engineer, Intel Corporation > +41 (0) 79 365 57 48 - http://blog.ffwll.ch
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3b06250..f89fb12 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -670,65 +670,51 @@ vlv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc, int target, int refclk, intel_clock_t *match_clock, intel_clock_t *best_clock) { - u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2; - u32 m, n, fastclk; - u32 updrate, minupdate, p; - unsigned long bestppm, ppm, absppm; - int dotclk, flag; - - flag = 0; - dotclk = target * 1000; - bestppm = 1000000; - ppm = absppm = 0; - fastclk = dotclk / (2*100); - updrate = 0; - minupdate = 19200; - n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0; - bestm1 = bestm2 = bestp1 = bestp2 = 0; + intel_clock_t clock = { + .dot = target * 5, /* fast clock */ + }; + unsigned int bestppm = 1000000; + /* min update 19.2 MHz */ + int max_n = min(limit->n.max, refclk / 19200); /* based on hardware requirement, prefer smaller n to precision */ - for (n = limit->n.min; n <= ((refclk) / minupdate); n++) { - updrate = refclk / n; - for (p1 = limit->p1.max; p1 > limit->p1.min; p1--) { - for (p2 = limit->p2.p2_fast+1; p2 > 0; p2--) { - if (p2 > 10) - p2 = p2 - 1; - p = p1 * p2; - /* based on hardware requirement, prefer bigger m1,m2 values */ - for (m1 = limit->m1.min; m1 <= limit->m1.max; m1++) { - m2 = (((2*(fastclk * p * n / m1 )) + - refclk) / (2*refclk)); - m = m1 * m2; - vco = updrate * m; - if (vco >= limit->vco.min && vco < limit->vco.max) { - ppm = 1000000 * ((vco / p) - fastclk) / fastclk; - absppm = (ppm > 0) ? ppm : (-ppm); - if (absppm < 100 && ((p1 * p2) > (bestp1 * bestp2))) { - bestppm = 0; - flag = 1; - } - if (absppm < bestppm - 10) { - bestppm = absppm; - flag = 1; - } - if (flag) { - bestn = n; - bestm1 = m1; - bestm2 = m2; - bestp1 = p1; - bestp2 = p2; - flag = 0; - } - } - } + for (clock.n = limit->n.min; clock.n <= max_n; clock.n++) { + for (clock.p1 = limit->p1.max; clock.p1 > limit->p1.min; clock.p1--) { + for (clock.p2 = limit->p2.p2_fast+1; clock.p2 > 0; clock.p2--) { + if (clock.p2 > 10) + clock.p2--; + clock.p = clock.p1 * clock.p2; + + /* based on hardware requirement, prefer bigger m1,m2 values */ + for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max; clock.m1++) { + unsigned int ppm, diff; + + clock.m2 = DIV_ROUND_CLOSEST(clock.dot * clock.p * clock.n, + clock.m1 * refclk); + clock.m = clock.m1 * clock.m2; + + clock.vco = refclk * clock.m / clock.n; + + if (clock.vco < limit->vco.min || + clock.vco >= limit->vco.max) + continue; + + diff = abs(clock.vco / clock.p - clock.dot); + ppm = div_u64(1000000ULL * diff, clock.dot); + + if (ppm < 100 && clock.p > best_clock->p) { + bestppm = 0; + *best_clock = clock; + } + + if (ppm + 10 < bestppm) { + bestppm = ppm; + *best_clock = clock; } } } - best_clock->n = bestn; - best_clock->m1 = bestm1; - best_clock->m2 = bestm2; - best_clock->p1 = bestp1; - best_clock->p2 = bestp2; + } + } return true; }