From: Raphael Zinsly <rzin...@ventanamicro.com>

Improve handling of constants where its upper and lower 32-bit
halves are the same and Zbkb is not available in riscv_move_integer.
riscv_split_integer already handles this but the changes in
riscv_build_integer makes it possible to improve code generation for
negative values.

e.g. for:

unsigned long f (void) { return 0xf857f2def857f2deUL; }

Without the patch:

li      a0,-128454656
addi    a0,a0,734
li      a5,-128454656
addi    a5,a5,735
slli    a5,a5,32
add     a0,a5,a0

With the patch:

li      a0,128454656
addi    a0,a0,-735
slli    a5,a0,32
add     a0,a0,a5
xori    a0,a0,-1

gcc/ChangeLog:
        * config/riscv/riscv.cc (riscv_build_integer): Detect constants
        with the same 32-bit halves and without Zbkb.
        (riscv_move_integer): Add synthesys of these constants.

gcc/testsuite/ChangeLog:
        * gcc.target/riscv/synthesis-11.c: New test.

        Co-authored-by: Jeff Law <j...@ventanamicro.com>
---
 gcc/config/riscv/riscv.cc                     | 59 +++++++++++++++++--
 gcc/testsuite/gcc.target/riscv/synthesis-11.c | 40 +++++++++++++
 2 files changed, 93 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/synthesis-11.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 8ece7859945..454220d8ba4 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1223,6 +1223,43 @@ riscv_build_integer (struct riscv_integer_op *codes, 
HOST_WIDE_INT value,
        }
 
     }
+  else if (cost > 3 && TARGET_64BIT && can_create_pseudo_p ())
+    {
+      struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
+      int alt_cost;
+
+      unsigned HOST_WIDE_INT loval = value & 0xffffffff;
+      unsigned HOST_WIDE_INT hival = (value & ~loval) >> 32;
+      bool bit31 = (hival & 0x80000000) != 0;
+      /* Without pack we can generate it with a shift 32 followed by an or.  */
+      if (hival == loval && !bit31)
+       {
+         alt_cost = 2 + riscv_build_integer_1 (alt_codes,
+                                               sext_hwi (loval, 32), mode);
+         if (alt_cost < cost)
+           {
+             /* We need to save the first constant we build.  */
+             alt_codes[alt_cost - 3].save_temporary = true;
+
+             /* Now we want to shift the previously generated constant into the
+                high half.  */
+             alt_codes[alt_cost - 2].code = ASHIFT;
+             alt_codes[alt_cost - 2].value = 32;
+             alt_codes[alt_cost - 2].use_uw = false;
+             alt_codes[alt_cost - 2].save_temporary = false;
+
+             /* And the final step, IOR the two halves together.  Since this 
uses
+                the saved temporary, use CONCAT similar to what we do for 
Zbkb.  */
+             alt_codes[alt_cost - 1].code = CONCAT;
+             alt_codes[alt_cost - 1].value = 0;
+             alt_codes[alt_cost - 1].use_uw = false;
+             alt_codes[alt_cost - 1].save_temporary = false;
+
+             memcpy (codes, alt_codes, sizeof (alt_codes));
+             cost = alt_cost;
+           }
+       }
+    }
 
   return cost;
 }
@@ -2786,12 +2823,22 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT 
value,
            }
          else if (codes[i].code == CONCAT || codes[i].code == VEC_MERGE)
            {
-             rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
-             rtx t2 = codes[i].code == VEC_MERGE ? old_value : x;
-             gcc_assert (t2);
-             t2 = gen_lowpart (SImode, t2);
-             emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2));
-             x = t;
+             if (codes[i].code == CONCAT && !TARGET_ZBKB)
+               {
+                 /* The two values should have no bits in common, so we can
+                    use PLUS instead of IOR which has a higher chance of
+                    using a compressed instruction.  */
+                 x = gen_rtx_PLUS (mode, x, old_value);
+               }
+             else
+               {
+                 rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
+                 rtx t2 = codes[i].code == VEC_MERGE ? old_value : x;
+                 gcc_assert (t2);
+                 t2 = gen_lowpart (SImode, t2);
+                 emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2));
+                 x = t;
+               }
            }
          else
            x = gen_rtx_fmt_ee (codes[i].code, mode,
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-11.c 
b/gcc/testsuite/gcc.target/riscv/synthesis-11.c
new file mode 100644
index 00000000000..98401d5ca32
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-11.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions.
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times 
"\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori|or)" 114 } 
} */
+
+
+
+unsigned long foo_0x7857f2de7857f2de(void) { return 0x7857f2de7857f2deUL; }
+unsigned long foo_0x19660e6319660e63(void) { return 0x19660e6319660e63UL; }
+unsigned long foo_0x137f1b75137f1b75(void) { return 0x137f1b75137f1b75UL; }
+unsigned long foo_0x35019fa035019fa0(void) { return 0x35019fa035019fa0UL; }
+unsigned long foo_0x3828e6c13828e6c1(void) { return 0x3828e6c13828e6c1UL; }
+unsigned long foo_0x039d87e9039d87e9(void) { return 0x039d87e9039d87e9UL; }
+unsigned long foo_0x429617c1429617c1(void) { return 0x429617c1429617c1UL; }
+unsigned long foo_0x2411811924118119(void) { return 0x2411811924118119UL; }
+unsigned long foo_0x0c01df7d0c01df7d(void) { return 0x0c01df7d0c01df7dUL; }
+unsigned long foo_0x70e23d6b70e23d6b(void) { return 0x70e23d6b70e23d6bUL; }
+unsigned long foo_0xf857f2def857f2de(void) { return 0xf857f2def857f2deUL; }
+unsigned long foo_0x99660e6399660e63(void) { return 0x99660e6399660e63UL; }
+unsigned long foo_0x937f1b75937f1b75(void) { return 0x937f1b75937f1b75UL; }
+unsigned long foo_0xb5019fa0b5019fa0(void) { return 0xb5019fa0b5019fa0UL; }
+unsigned long foo_0xb828e6c1b828e6c1(void) { return 0xb828e6c1b828e6c1UL; }
+unsigned long foo_0x839d87e9839d87e9(void) { return 0x839d87e9839d87e9UL; }
+unsigned long foo_0xc29617c1c29617c1(void) { return 0xc29617c1c29617c1UL; }
+unsigned long foo_0xa4118119a4118119(void) { return 0xa4118119a4118119UL; }
+unsigned long foo_0x8c01df7d8c01df7d(void) { return 0x8c01df7d8c01df7dUL; }
+unsigned long foo_0xf0e23d6bf0e23d6b(void) { return 0xf0e23d6bf0e23d6bUL; }
+unsigned long foo_0x7fff00007fff0000(void) { return 0x7fff00007fff0000UL; }
+
-- 
2.42.0

Reply via email to