The saturation arithmetic logic in helper_macw is not correct.

I tested and verified this behavior on a SH7091, the general pattern
is a code sequence such as:

        sets

        mov.l _mach,r2
        lds r2,mach
        mov.l _macl,r2
        lds r2,macl

        mova _n,r0
        mov r0,r1
        mova _m,r0
        mac.w @r0+,@r1+

 _mach: .long 0xffffffff
 _macl: .long 0xfffffffe
 _m:    .word 0x0002
        .word 0
 _n:    .word 0x0003
        .word 0

test 0:
  (mach should not be modified if an overflow did not occur)

  given, prior to saturation mac.l:
    mach = 0xffffffff ; macl = 0xfffffffe
    @r0  = 0x0002     ; @r1  = 0x0003

  expected saturation mac.w result:
    mach = 0xffffffff (unchanged)
    macl = 0x00000004

  qemu saturation mac.w result (before this commit):
    mach = 0x00000001
    macl = 0x80000000

  In the context of the helper_macw implementation prior to this
  commit, initially this appears to be a surprising result. This is
  because (prior to unary negation) the C literal `0x80000000` (due to
  being outside the range of a `signed int`) is evaluated as an
  `unsigned int` whereas the literal `1` (due to being inside the
  range of `signed int`) is evaluated as `signed int`, as in:

    static_assert(1 < -0x80000000 == 1);
    static_assert(1 < -1 == 0);

  This is because the unary negation of an unsigned int is an
  unsigned int.

  In other words, if the `res < -0x80000000` comparison used
  infinite-precision literals, the saturation mac.w result would have
  been:

    mach = 0x00000000
    macl = 0x00000004

  Due to this (forgivable) misunderstanding of C literals, the
  following behavior also occurs:

test 1:
  (`2 * 3 + 0` is not an overflow)

  given, prior to saturation mac.l:
    mach = 0x00000000 ; macl = 0x00000000
    @r0  = 0x0002     ; @r1  = 0x0003

  expected saturation mac.w result:
    mach = 0x00000000 (unchanged)
    macl = 0x00000006

  qemu saturation mac.w result (before this commit):
    mach = 0x00000001
    macl = 0x80000000

test 2:
  (mach should not be accumulated in saturation mode)
  (16-bit operands are sign-extended)

  given, prior to saturation mac.l:
    mach = 0x12345678 ; macl = 0x7ffffffe
    @r0  = 0x0002     ; @r1  = 0xfffd

  expected saturation mac.w result:
    mach = 0x12345678 (unchanged)
    macl = 0x7ffffff8

  qemu saturation mac.w result (before this commit):
    mach = 0x00000001
    macl = 0x7fffffff

test 3:
  (macl should have the correct saturation value)

  given, prior to saturation mac.l:
    mach = 0xabcdef12 ; macl = 0x7ffffffa
    @r0  = 0x0002     ; @r1  = 0x0003

  expected saturation mac.w result:
    mach = 0x00000001 (overwritten)
    macl = 0x7fffffff

  qemu saturation mac.w result (before this commit):
    mach = 0x00000001
    macl = 0x80000000

All of the above also matches the description of MAC.W as documented
in cd00147165-sh-4-32-bit-cpu-core-architecture-stmicroelectronics.pdf

Signed-off-by: Zack Buhman <z...@buhman.org>
---
 target/sh4/op_helper.c | 41 +++++++++++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index ee16524083..b3c1e69f53 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -187,20 +187,41 @@ void helper_macl(CPUSH4State *env, uint32_t arg0, 
uint32_t arg1)
 
 void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
-    int64_t res;
+    int16_t value0 = (int16_t)arg0;
+    int16_t value1 = (int16_t)arg1;
+    int32_t mul = ((int32_t)value0) * ((int32_t)value1);
 
-    res = ((uint64_t) env->mach << 32) | env->macl;
-    res += (int64_t) (int16_t) arg0 *(int64_t) (int16_t) arg1;
-    env->mach = (res >> 32) & 0xffffffff;
-    env->macl = res & 0xffffffff;
+    /* Perform 32-bit saturation arithmetic if the S flag is set */
     if (env->sr & (1u << SR_S)) {
-        if (res < -0x80000000) {
-            env->mach = 1;
-            env->macl = 0x80000000;
-        } else if (res > 0x000000007fffffff) {
+        const int64_t upper_bound =  ((1ul << 31) - 1);
+        const int64_t lower_bound = -((1ul << 31) - 0);
+
+        /*
+         * In saturation arithmetic mode, the accumulator is 32-bit
+         * with carry. MACH is not considered during the addition
+         * operation nor the 32-bit saturation logic.
+         */
+        int32_t mac = env->macl;
+        int32_t result;
+        bool overflow = sadd32_overflow(mac, mul, &result);
+        if (overflow) {
+            result = (mac < 0) ? lower_bound : upper_bound;
+            /* MACH is set to 1 to denote overflow */
+            env->macl = result;
             env->mach = 1;
-            env->macl = 0x7fffffff;
+        } else {
+            result = MIN(MAX(result, lower_bound), upper_bound);
+            /* If there was no overflow, MACH is unchanged */
+            env->macl = result;
         }
+    } else {
+        /* In non-saturation arithmetic mode, the accumulator is 64-bit */
+        int64_t mac = (((uint64_t)env->mach) << 32) | env->macl;
+
+        /* The carry bit of the 64-bit addition is discarded */
+        int64_t result = mac + (int64_t)mul;
+        env->macl = result;
+        env->mach = result >> 32;
     }
 }
 
-- 
2.41.0


Reply via email to