Message ID | 20220405195558.66144-6-lucas.araujo@eldorado.org.br (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | VDIV/VMOD Implementation | expand |
On 4/5/22 12:55, Lucas Mateus Castro(alqotel) wrote: > From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br> > > Based on already existing QEMU implementation, created an unsigned 256 > bit by 128 bit division needed to implement the vector divide extended > unsigned instruction from PowerISA3.1 > > Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br> > --- > include/qemu/host-utils.h | 15 +++++ > include/qemu/int128.h | 20 ++++++ > util/host-utils.c | 128 ++++++++++++++++++++++++++++++++++++++ > 3 files changed, 163 insertions(+) > > diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h > index ca979dc6cc..6da6a93f69 100644 > --- a/include/qemu/host-utils.h > +++ b/include/qemu/host-utils.h > @@ -32,6 +32,7 @@ > > #include "qemu/compiler.h" > #include "qemu/bswap.h" > +#include "qemu/int128.h" > > #ifdef CONFIG_INT128 > static inline void mulu64(uint64_t *plow, uint64_t *phigh, > @@ -153,6 +154,19 @@ static inline int clo64(uint64_t val) > return clz64(~val); > } > > +/* > + * clz128 - count leading zeros in a 128-bit value. > + * @val: The value to search > + */ > +static inline int clz128(Int128 a) > +{ > + if (int128_gethi(a)) { > + return clz64(int128_gethi(a)); > + } else { > + return clz64(int128_getlo(a)) + 64; > + } > +} Should be in int128.h, like bswap128. > diff --git a/util/host-utils.c b/util/host-utils.c > index bcc772b8ec..c6a01638c7 100644 > --- a/util/host-utils.c > +++ b/util/host-utils.c > @@ -266,3 +266,131 @@ void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow) > *plow = *plow << shift; > } > } > +/* Watch your spacing. > + * Unsigned 256-by-128 division. > + * Returns the remainder via r. > + * Returns lower 128 bit of quotient. > + * Needs a normalized divisor (most significant bit set to 1). > + * > + * Adapted from include/qemu/host-utils.h udiv_qrnnd, > + * from the GNU Multi Precision Library - longlong.h __udiv_qrnnd > + * (https://gmplib.org/repo/gmp/file/tip/longlong.h) > + * > + * Licensed under the GPLv2/LGPLv3 > + */ > +static Int128 udiv256_qrnnd(Int128 *r, Int128 n1, Int128 n0, Int128 d) > +{ > + Int128 d0, d1, q0, q1, r1, r0, m; > + uint64_t mp0, mp1; > + > + d0 = int128_make64(int128_getlo(d)); > + d1 = int128_make64(int128_gethi(d)); > + > + r1 = int128_remu(n1, d1); > + q1 = int128_divu(n1, d1); > + mp0 = int128_getlo(q1); > + mp1 = int128_gethi(q1); > + mulu128(&mp0, &mp1, int128_getlo(d0)); > + m = int128_make128(mp0, mp1); > + r1 = int128_make128(int128_gethi(n0), int128_getlo(r1)); > + if (int128_ult(r1, m)) { > + q1 = int128_sub(q1, int128_one()); > + r1 = int128_add(r1, d); > + if (int128_uge(r1, d)) { > + if (int128_ult(r1, m)) { > + q1 = int128_sub(q1, int128_one()); > + r1 = int128_add(r1, d); > + } > + } > + } > + r1 = int128_sub(r1, m); > + > + r0 = int128_remu(r1, d1); > + q0 = int128_divu(r1, d1); > + mp0 = int128_getlo(q0); > + mp1 = int128_gethi(q0); > + mulu128(&mp0, &mp1, int128_getlo(d0)); > + m = int128_make128(mp0, mp1); > + r0 = int128_make128(int128_getlo(n0), int128_getlo(r0)); > + if (int128_ult(r0, m)) { > + q0 = int128_sub(q0, int128_one()); > + r0 = int128_add(r0, d); > + if (int128_uge(r0, d)) { > + if (int128_ult(r0, m)) { > + q0 = int128_sub(q0, int128_one()); > + r0 = int128_add(r0, d); > + } > + } > + } > + r0 = int128_sub(r0, m); > + > + *r = r0; > + return int128_or(int128_lshift(q1, 64), q0); > +} > + > +/* > + * Unsigned 256-by-128 division. > + * Returns the remainder. > + * Returns quotient via plow and phigh. > + * Also returns the remainder via the function return value. > + */ > +Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor) > +{ > + Int128 dhi = *phigh; > + Int128 dlo = *plow; > + Int128 rem, dhighest; > + int sh; > + > + if (!int128_nz(divisor) || !int128_nz(dhi)) { > + *plow = int128_divu(dlo, divisor); > + *phigh = int128_zero(); > + return int128_remu(dlo, divisor); > + } else { > + sh = clz128(divisor); > + > + if (int128_ult(dhi, divisor)) { > + if (sh != 0) { > + /* normalize the divisor, shifting the dividend accordingly */ > + divisor = int128_lshift(divisor, sh); > + dhi = int128_or(int128_lshift(dhi, sh), > + int128_urshift(dlo, (128 - sh))); > + dlo = int128_lshift(dlo, sh); > + } > + > + *phigh = int128_zero(); > + *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor); > + } else { > + if (sh != 0) { > + /* normalize the divisor, shifting the dividend accordingly */ > + divisor = int128_lshift(divisor, sh); > + dhighest = int128_rshift(dhi, (128 - sh)); > + dhi = int128_or(int128_lshift(dhi, sh), > + int128_urshift(dlo, (128 - sh))); > + dlo = int128_lshift(dlo, sh); > + > + *phigh = udiv256_qrnnd(&dhi, dhighest, dhi, divisor); > + } else { > + /* > + * dhi >= divisor > + * Since the MSB of divisor is set (sh == 0), > + * (dhi - divisor) < divisor > + * > + * Thus, the high part of the quotient is 1, and we can > + * calculate the low part with a single call to udiv_qrnnd > + * after subtracting divisor from dhi > + */ > + dhi = int128_sub(dhi, divisor); > + *phigh = int128_one(); > + } > + > + *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor); > + } > + > + /* > + * since the dividend/divisor might have been normalized, > + * the remainder might also have to be shifted back > + */ > + rem = int128_urshift(rem, sh); > + return rem; > + } > +} I guess this works. I'm starting to wonder if we shouldn't use libgmp, instead of rolling our own. In this case, mpn_tdiv_qr. Anyway, modulo placement of clz128, Reviewed-by: Richard Henderson <richard.henderson@linaro.org> r~
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index ca979dc6cc..6da6a93f69 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -32,6 +32,7 @@ #include "qemu/compiler.h" #include "qemu/bswap.h" +#include "qemu/int128.h" #ifdef CONFIG_INT128 static inline void mulu64(uint64_t *plow, uint64_t *phigh, @@ -153,6 +154,19 @@ static inline int clo64(uint64_t val) return clz64(~val); } +/* + * clz128 - count leading zeros in a 128-bit value. + * @val: The value to search + */ +static inline int clz128(Int128 a) +{ + if (int128_gethi(a)) { + return clz64(int128_gethi(a)); + } else { + return clz64(int128_getlo(a)) + 64; + } +} + /** * ctz32 - count trailing zeros in a 32-bit value. * @val: The value to search @@ -849,4 +863,5 @@ static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, #endif } +Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor); #endif diff --git a/include/qemu/int128.h b/include/qemu/int128.h index 3af01f38cd..2a9ee956aa 100644 --- a/include/qemu/int128.h +++ b/include/qemu/int128.h @@ -128,11 +128,21 @@ static inline bool int128_ge(Int128 a, Int128 b) return a >= b; } +static inline bool int128_uge(Int128 a, Int128 b) +{ + return ((__uint128_t)a) >= ((__uint128_t)b); +} + static inline bool int128_lt(Int128 a, Int128 b) { return a < b; } +static inline bool int128_ult(Int128 a, Int128 b) +{ + return (__uint128_t)a < (__uint128_t)b; +} + static inline bool int128_le(Int128 a, Int128 b) { return a <= b; @@ -373,11 +383,21 @@ static inline bool int128_ge(Int128 a, Int128 b) return a.hi > b.hi || (a.hi == b.hi && a.lo >= b.lo); } +static inline bool int128_uge(Int128 a, Int128 b) +{ + return (uint64_t)a.hi > (uint64_t)b.hi || (a.hi == b.hi && a.lo >= b.lo); +} + static inline bool int128_lt(Int128 a, Int128 b) { return !int128_ge(a, b); } +static inline bool int128_ult(Int128 a, Int128 b) +{ + return !int128_uge(a, b); +} + static inline bool int128_le(Int128 a, Int128 b) { return int128_ge(b, a); diff --git a/util/host-utils.c b/util/host-utils.c index bcc772b8ec..c6a01638c7 100644 --- a/util/host-utils.c +++ b/util/host-utils.c @@ -266,3 +266,131 @@ void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow) *plow = *plow << shift; } } +/* + * Unsigned 256-by-128 division. + * Returns the remainder via r. + * Returns lower 128 bit of quotient. + * Needs a normalized divisor (most significant bit set to 1). + * + * Adapted from include/qemu/host-utils.h udiv_qrnnd, + * from the GNU Multi Precision Library - longlong.h __udiv_qrnnd + * (https://gmplib.org/repo/gmp/file/tip/longlong.h) + * + * Licensed under the GPLv2/LGPLv3 + */ +static Int128 udiv256_qrnnd(Int128 *r, Int128 n1, Int128 n0, Int128 d) +{ + Int128 d0, d1, q0, q1, r1, r0, m; + uint64_t mp0, mp1; + + d0 = int128_make64(int128_getlo(d)); + d1 = int128_make64(int128_gethi(d)); + + r1 = int128_remu(n1, d1); + q1 = int128_divu(n1, d1); + mp0 = int128_getlo(q1); + mp1 = int128_gethi(q1); + mulu128(&mp0, &mp1, int128_getlo(d0)); + m = int128_make128(mp0, mp1); + r1 = int128_make128(int128_gethi(n0), int128_getlo(r1)); + if (int128_ult(r1, m)) { + q1 = int128_sub(q1, int128_one()); + r1 = int128_add(r1, d); + if (int128_uge(r1, d)) { + if (int128_ult(r1, m)) { + q1 = int128_sub(q1, int128_one()); + r1 = int128_add(r1, d); + } + } + } + r1 = int128_sub(r1, m); + + r0 = int128_remu(r1, d1); + q0 = int128_divu(r1, d1); + mp0 = int128_getlo(q0); + mp1 = int128_gethi(q0); + mulu128(&mp0, &mp1, int128_getlo(d0)); + m = int128_make128(mp0, mp1); + r0 = int128_make128(int128_getlo(n0), int128_getlo(r0)); + if (int128_ult(r0, m)) { + q0 = int128_sub(q0, int128_one()); + r0 = int128_add(r0, d); + if (int128_uge(r0, d)) { + if (int128_ult(r0, m)) { + q0 = int128_sub(q0, int128_one()); + r0 = int128_add(r0, d); + } + } + } + r0 = int128_sub(r0, m); + + *r = r0; + return int128_or(int128_lshift(q1, 64), q0); +} + +/* + * Unsigned 256-by-128 division. + * Returns the remainder. + * Returns quotient via plow and phigh. + * Also returns the remainder via the function return value. + */ +Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor) +{ + Int128 dhi = *phigh; + Int128 dlo = *plow; + Int128 rem, dhighest; + int sh; + + if (!int128_nz(divisor) || !int128_nz(dhi)) { + *plow = int128_divu(dlo, divisor); + *phigh = int128_zero(); + return int128_remu(dlo, divisor); + } else { + sh = clz128(divisor); + + if (int128_ult(dhi, divisor)) { + if (sh != 0) { + /* normalize the divisor, shifting the dividend accordingly */ + divisor = int128_lshift(divisor, sh); + dhi = int128_or(int128_lshift(dhi, sh), + int128_urshift(dlo, (128 - sh))); + dlo = int128_lshift(dlo, sh); + } + + *phigh = int128_zero(); + *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor); + } else { + if (sh != 0) { + /* normalize the divisor, shifting the dividend accordingly */ + divisor = int128_lshift(divisor, sh); + dhighest = int128_rshift(dhi, (128 - sh)); + dhi = int128_or(int128_lshift(dhi, sh), + int128_urshift(dlo, (128 - sh))); + dlo = int128_lshift(dlo, sh); + + *phigh = udiv256_qrnnd(&dhi, dhighest, dhi, divisor); + } else { + /* + * dhi >= divisor + * Since the MSB of divisor is set (sh == 0), + * (dhi - divisor) < divisor + * + * Thus, the high part of the quotient is 1, and we can + * calculate the low part with a single call to udiv_qrnnd + * after subtracting divisor from dhi + */ + dhi = int128_sub(dhi, divisor); + *phigh = int128_one(); + } + + *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor); + } + + /* + * since the dividend/divisor might have been normalized, + * the remainder might also have to be shifted back + */ + rem = int128_urshift(rem, sh); + return rem; + } +}