On 8/8/24 9:12 PM, pan2...@intel.com wrote:
From: Pan Li <pan2...@intel.com>

For QI/HImode of .SAT_ADD,  the operands may be sign-extended and the
high bits of Xmode may be all 1 which is not expected.  For example as
below code.

signed char b[1];
unsigned short c;
signed char *d = b;
int main() {
   b[0] = -40;
   c = ({ (unsigned short)d[0] < 0xFFF6 ? (unsigned short)d[0] : 0xFFF6; }) + 9;
   __builtin_printf("%d\n", c);
}

After expanding we have:

;; _6 = .SAT_ADD (_3, 9);
(insn 8 7 9 (set (reg:DI 143)
         (high:DI (symbol_ref:DI ("d") [flags 0x86]  <var_decl d>)))
      (nil))
(insn 9 8 10 (set (reg/f:DI 142)
         (mem/f/c:DI (lo_sum:DI (reg:DI 143)
                 (symbol_ref:DI ("d") [flags 0x86]  <var_decl d>)) [1 d+0 S8 
A64]))
      (nil))
(insn 10 9 11 (set (reg:HI 144 [ _3 ])
         (sign_extend:HI (mem:QI (reg/f:DI 142) [0 *d.0_1+0 S1 A8]))) 
"test.c":7:10 -1
      (nil))

The convert from signed char to unsigned short will have sign_extend rtl
as above.  And finally become the lb insn as below:

lb      a1,0(a5)   // a1 is -40, aka 0xffffffffffffffd8
lui     a0,0x1a
addi    a5,a1,9
slli    a5,a5,0x30
srli    a5,a5,0x30 // a5 is 65505
sltu    a1,a5,a1   // compare 65505 and 0xffffffffffffffd8 => TRUE

The sltu try to compare 65505 and 0xffffffffffffffd8 here,  but we
actually want to compare 65505 and 65496 (0xffd8).  Thus we need to
clean up the high bits to ensure this.

The below test suites are passed for this patch:
* The rv64gcv fully regression test.

        PR target/116278

gcc/ChangeLog:

        * config/riscv/riscv.cc (riscv_cleanup_rtx_high): Add new func
        impl to cleanup high bits of rtx.
        (riscv_expand_usadd): Leverage above func to cleanup operands
        and sum.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/sat_u_add-1.c: Adjust asm check.
        * gcc.target/riscv/sat_u_add-10.c: Ditto.
        * gcc.target/riscv/sat_u_add-13.c: Ditto.
        * gcc.target/riscv/sat_u_add-14.c: Ditto.
        * gcc.target/riscv/sat_u_add-17.c: Ditto.
        * gcc.target/riscv/sat_u_add-18.c: Ditto.
        * gcc.target/riscv/sat_u_add-2.c: Ditto.
        * gcc.target/riscv/sat_u_add-21.c: Ditto.
        * gcc.target/riscv/sat_u_add-22.c: Ditto.
        * gcc.target/riscv/sat_u_add-5.c: Ditto.
        * gcc.target/riscv/sat_u_add-6.c: Ditto.
        * gcc.target/riscv/sat_u_add-9.c: Ditto.
        * gcc.target/riscv/sat_u_add_imm-1.c: Ditto.
        * gcc.target/riscv/sat_u_add_imm-10.c: Ditto.
        * gcc.target/riscv/sat_u_add_imm-13.c: Ditto.
        * gcc.target/riscv/sat_u_add_imm-14.c: Ditto.
        * gcc.target/riscv/sat_u_add_imm-2.c: Ditto.
        * gcc.target/riscv/sat_u_add_imm-5.c: Ditto.
        * gcc.target/riscv/sat_u_add_imm-6.c: Ditto.
        * gcc.target/riscv/sat_u_add_imm-9.c: Ditto.
        * gcc.target/riscv/pr116278-run-1.c: New test.

Signed-off-by: Pan Li <pan2...@intel.com>
---
  gcc/config/riscv/riscv.cc                     | 30 ++++++++++++++-----
  .../gcc.target/riscv/pr116278-run-1.c         | 16 ++++++++++
  gcc/testsuite/gcc.target/riscv/sat_u_add-1.c  |  1 +
  gcc/testsuite/gcc.target/riscv/sat_u_add-10.c |  2 ++
  gcc/testsuite/gcc.target/riscv/sat_u_add-13.c |  1 +
  gcc/testsuite/gcc.target/riscv/sat_u_add-14.c |  2 ++
  gcc/testsuite/gcc.target/riscv/sat_u_add-17.c |  1 +
  gcc/testsuite/gcc.target/riscv/sat_u_add-18.c |  2 ++
  gcc/testsuite/gcc.target/riscv/sat_u_add-2.c  |  2 ++
  gcc/testsuite/gcc.target/riscv/sat_u_add-21.c |  1 +
  gcc/testsuite/gcc.target/riscv/sat_u_add-22.c |  2 ++
  gcc/testsuite/gcc.target/riscv/sat_u_add-5.c  |  1 +
  gcc/testsuite/gcc.target/riscv/sat_u_add-6.c  |  2 ++
  gcc/testsuite/gcc.target/riscv/sat_u_add-9.c  |  1 +
  .../gcc.target/riscv/sat_u_add_imm-1.c        |  1 +
  .../gcc.target/riscv/sat_u_add_imm-10.c       |  2 ++
  .../gcc.target/riscv/sat_u_add_imm-13.c       |  1 +
  .../gcc.target/riscv/sat_u_add_imm-14.c       |  2 ++
  .../gcc.target/riscv/sat_u_add_imm-2.c        |  2 ++
  .../gcc.target/riscv/sat_u_add_imm-5.c        |  1 +
  .../gcc.target/riscv/sat_u_add_imm-6.c        |  2 ++
  .../gcc.target/riscv/sat_u_add_imm-9.c        |  1 +
  22 files changed, 68 insertions(+), 8 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/riscv/pr116278-run-1.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 5fe4273beb7..fb916217e5e 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -11564,6 +11564,24 @@ riscv_get_raw_result_mode (int regno)
    return default_get_reg_raw_mode (regno);
  }
+/* Cleanup the high bits of the RTX x and reserve the low bits.
+   The reserved bitsize comes from the bitsize of reserved_mode.  */
+
+static void
+riscv_cleanup_rtx_high (rtx x, machine_mode reserved_mode)
+{
+  machine_mode mode = GET_MODE (x);
+  int reserved_bitsize = GET_MODE_BITSIZE (reserved_mode).to_constant ();
+  int mode_bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+
+  gcc_assert (mode_bitsize >= reserved_bitsize);
+
+  int shift_bitsize = mode_bitsize - reserved_bitsize;
+
+  riscv_emit_binary (ASHIFT, x, x, GEN_INT (shift_bitsize));
+  riscv_emit_binary (LSHIFTRT, x, x, GEN_INT (shift_bitsize));
+}
Isn't this just zero extension from a narrower mode to a wider mode? Why not just use zero_extend? That will take advantage of existing expansion code to select an efficient extension approach at initial RTL generation rather than waiting for combine to clean things up.

jeff

Reply via email to