Message ID | 20240404151100.24063-1-zack@buhman.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | sh4: mac.l: implement saturation arithmetic logic | expand |
On Thu, 4 Apr 2024 at 16:12, Zack Buhman <zack@buhman.org> wrote: > > The saturation arithmetic logic in helper_macl is not correct. > > I tested and verified this behavior on a SH7091, the general pattern > is a code sequence such as: > > sets > > mov.l _mach,r2 > lds r2,mach > mov.l _macl,r2 > lds r2,macl > > mova _n,r0 > mov r0,r1 > mova _m,r0 > mac.l @r0+,@r1+ > > _mach: .long 0x00007fff > _macl: .long 0x12345678 > _m: .long 0x7fffffff > _n: .long 0x7fffffff > > Test case 0: (no int64_t overflow) > given; prior to saturation mac.l: > mach = 0x00007fff macl = 0x12345678 > @r0 = 0x7fffffff @r1 = 0x7fffffff > > expected saturation mac.l result: > mach = 0x00007fff macl = 0xffffffff > > qemu saturation mac.l result (prior to this commit): > mach = 0x00007ffe macl = 0x12345678 > > Test case 1: (no int64_t overflow) > given; prior to saturation mac.l: > mach = 0xffff8000 macl = 0x00000000 > @r0 = 0xffffffff @r1 = 0x00000001 > > expected saturation mac.l result: > mach = 0xffff8000 macl = 0x00000000 > > qemu saturation mac.l result (prior to this commit): > mach = 0xffff7fff macl = 0xffffffff > > Test case 2: (int64_t addition overflow) > given; prior to saturation mac.l: > mach = 0x80000000 macl = 0x00000000 > @r0 = 0xffffffff @r1 = 0x00000001 > > expected saturation mac.l result: > mach = 0xffff8000 macl = 0x00000000 > > qemu saturation mac.l result (prior to this commit): > mach = 0xffff7fff macl = 0xffffffff > > Test case 3: (int64_t addition overflow) > given; prior to saturation mac.l: > mach = 0x7fffffff macl = 0x00000000 > @r0 = 0x7fffffff @r1 = 0x7fffffff > > expected saturation mac.l result: > mach = 0x00007fff macl = 0xffffffff > > qemu saturation mac.l result (prior to this commit): > mach = 0xfffffffe macl = 0x00000001 > > All of the above also matches the description of MAC.L as documented > in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf > --- > target/sh4/op_helper.c | 45 ++++++++++++++++++++++++++++++++---------- > 1 file changed, 35 insertions(+), 10 deletions(-) > > diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c > index 4559d0d376..a3eb2f5281 100644 > --- a/target/sh4/op_helper.c > +++ b/target/sh4/op_helper.c > @@ -160,18 +160,43 @@ void helper_ocbi(CPUSH4State *env, uint32_t address) > > void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1) > { > - int64_t res; > - > - res = ((uint64_t) env->mach << 32) | env->macl; > - res += (int64_t) (int32_t) arg0 *(int64_t) (int32_t) arg1; > - env->mach = (res >> 32) & 0xffffffff; > - env->macl = res & 0xffffffff; > + int32_t value0 = (int32_t)arg0; > + int32_t value1 = (int32_t)arg1; > + int64_t mul = ((int64_t)value0) * ((int64_t)value1); > + int64_t mac = (((uint64_t)env->mach) << 32) | env->macl; > + int64_t result = mac + mul; > + /* Perform 48-bit saturation arithmetic if the S flag is set */ > if (env->sr & (1u << SR_S)) { > - if (res < 0) > - env->mach |= 0xffff0000; > - else > - env->mach &= 0x00007fff; > + /* > + * The following xor/and expression is necessary to detect an > + * overflow in MSB of res; this is logic necessary because the > + * sign bit of `mac + mul` may overflow. The MAC unit on real > + * SH-4 hardware has carry/saturation logic that is equivalent > + * to the following: > + */ > + const int64_t upper_bound = ((1ull << 47) - 1); > + const int64_t lower_bound = -((1ull << 47) - 0); > + > + if (((((result ^ mac) & (result ^ mul)) >> 63) & 1) == 1) { > + /* An overflow occured during 64-bit addition */ This is testing whether the "int64_t result = mac + mul" signed 64-bit arithmetic overflowed, right? That's probably cleaner written by using the sadd64_overflow() function in host-utils.h, which does the 64-bit add and returns a bool to tell you whether it overflowed or not: if (sadd64_overflow(mac, mul, &result)) { result = (result < 0) ? lower_bound : upper_bound; } else { result = MIN(MAX(result, lower_bound), upper_bound); } > + if (((mac >> 63) & 1) == 0) { > + result = upper_bound; > + } else { > + result = lower_bound; > + } > + } else { > + /* An overflow did not occur during 64-bit addition */ > + if (result > upper_bound) { > + result = upper_bound; > + } else if (result < lower_bound) { > + result = lower_bound; > + } else { > + /* leave result unchanged */ > + } > + } > } > + env->macl = result; > + env->mach = result >> 32; thanks -- PMM
diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c index 4559d0d376..a3eb2f5281 100644 --- a/target/sh4/op_helper.c +++ b/target/sh4/op_helper.c @@ -160,18 +160,43 @@ void helper_ocbi(CPUSH4State *env, uint32_t address) void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1) { - int64_t res; - - res = ((uint64_t) env->mach << 32) | env->macl; - res += (int64_t) (int32_t) arg0 *(int64_t) (int32_t) arg1; - env->mach = (res >> 32) & 0xffffffff; - env->macl = res & 0xffffffff; + int32_t value0 = (int32_t)arg0; + int32_t value1 = (int32_t)arg1; + int64_t mul = ((int64_t)value0) * ((int64_t)value1); + int64_t mac = (((uint64_t)env->mach) << 32) | env->macl; + int64_t result = mac + mul; + /* Perform 48-bit saturation arithmetic if the S flag is set */ if (env->sr & (1u << SR_S)) { - if (res < 0) - env->mach |= 0xffff0000; - else - env->mach &= 0x00007fff; + /* + * The following xor/and expression is necessary to detect an + * overflow in MSB of res; this is logic necessary because the + * sign bit of `mac + mul` may overflow. The MAC unit on real + * SH-4 hardware has carry/saturation logic that is equivalent + * to the following: + */ + const int64_t upper_bound = ((1ull << 47) - 1); + const int64_t lower_bound = -((1ull << 47) - 0); + + if (((((result ^ mac) & (result ^ mul)) >> 63) & 1) == 1) { + /* An overflow occured during 64-bit addition */ + if (((mac >> 63) & 1) == 0) { + result = upper_bound; + } else { + result = lower_bound; + } + } else { + /* An overflow did not occur during 64-bit addition */ + if (result > upper_bound) { + result = upper_bound; + } else if (result < lower_bound) { + result = lower_bound; + } else { + /* leave result unchanged */ + } + } } + env->macl = result; + env->mach = result >> 32; } void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)