Hi,
This patch tries to optimize PowerPC 64 bit constant generation when
the constant can be transformed from a 32 bit or 16 bit constant by
rotating, shifting and mask AND.
The attachments are the patch diff file and change log file.
Bootstrapped and tested on powerpc64le with no regressions. Is this
okay for trunk? Any recommendations? Thanks a lot.
PR target/94395
* config/rs6000/rs6000.c (rs6000_emit_set_32bit_const,
rs6000_rotate_long_const, rs6000_peel_long_const): New functions.
(rs6000_emit_set_long_const, num_insns_constant_gpr): Call new
functions.
* testsuite/gcc.target/powerpc/pr94395.c: New test.
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index f26fc13484b..bcb867ffe94 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1109,6 +1109,9 @@ static tree rs6000_handle_longcall_attribute (tree *,
tree, tree, int, bool *);
static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
+static HOST_WIDE_INT rs6000_rotate_long_const (unsigned HOST_WIDE_INT, int *);
+static HOST_WIDE_INT rs6000_peel_long_const (unsigned HOST_WIDE_INT, int *,
+ int *);
static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
@@ -5868,12 +5871,28 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
else if (TARGET_POWERPC64)
{
+ int rotate, head, tail;
+ HOST_WIDE_INT imm1, imm2;
+ unsigned HOST_WIDE_INT uc = value;
HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
HOST_WIDE_INT high = value >> 31;
if (high == 0 || high == -1)
return 2;
+ /* A long constant can be transformed from both a 16 bit constant and
+ a 32 bit constant. So we first test imm1 and imm2 if they're 16
+ bit. */
+ imm1 = rs6000_rotate_long_const (uc, &rotate);
+ if (SIGNED_INTEGER_16BIT_P (imm1))
+ return 2;
+ imm2 = rs6000_peel_long_const (uc, &head, &tail);
+ if (SIGNED_INTEGER_16BIT_P (imm2))
+ return 2;
+ if (SIGNED_INTEGER_NBIT_P (imm1, 32)
+ || SIGNED_INTEGER_NBIT_P (imm2, 32))
+ return 3;
+
high >>= 1;
if (low == 0 || low == high)
@@ -9720,6 +9739,96 @@ rs6000_emit_set_const (rtx dest, rtx source)
return true;
}
+/* Function to load 32 a bit constant. */
+static void
+rs6000_emit_set_32bit_const (rtx dest, HOST_WIDE_INT c)
+{
+ gcc_assert (SIGNED_INTEGER_NBIT_P (c, 32));
+
+ rtx temp = can_create_pseudo_p () ? gen_reg_rtx (DImode) : dest;
+
+ if (SIGNED_INTEGER_16BIT_P (c))
+ emit_insn (gen_rtx_SET (dest, GEN_INT (c)));
+ else
+ {
+ emit_insn (gen_rtx_SET (copy_rtx (temp),
+ GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
+ emit_insn (gen_rtx_SET (dest,
+ gen_rtx_IOR (DImode, copy_rtx (temp),
+ GEN_INT (c & 0xffff))));
+ }
+}
+
+/* Helper function of rs6000_emit_set_long_const to left rotate a long
+ constant. It returns the result immediately when it finds a 32 bit
+ constant. It at most rotates for 31 bits.
+ For instant, the constant 0x1230000000000004 can be transformed to
+ a 32 bit constant 0x0000000000004123 by left rotating 12 bits. */
+static HOST_WIDE_INT
+rs6000_rotate_long_const (unsigned HOST_WIDE_INT c, int *rot)
+{
+ int bitsize = GET_MODE_BITSIZE (DImode);
+ bool found = false;
+ unsigned HOST_WIDE_INT imm = c;
+ unsigned HOST_WIDE_INT m = imm >> (bitsize - 1);
+ int rotate = 0;
+
+ while (rotate < 31 && !found)
+ {
+ imm = imm << 1 | m;
+ if (clz_hwi (imm) > 32 || clz_hwi (~imm) > 32)
+ found = true;
+ rotate++;
+ m = imm >> (bitsize - 1);
+ }
+
+ *rot = rotate;
+ return imm;
+}
+
+/* Helper function of rs6000_emit_set_long_const to reutrn a constant by
+ removing consecutive 0s and 1s at the head and tail then setting all high
+ bits.
+ For instance, 0x000000fff2345000 can be transformed to 0xfff2345 by
+ peeling the head and tail, then to 0xffffffffffff234 by setting all
+ high bits.
+
+ 0x0 0 0 0 0 0 f f f 2 3 4 5 0 0 0
+ |_________| |___|
+ head_zero tail_zero
+
+ 0xf f f f f f f f f 2 3 4 5 f f f
+ |_______________| |___|
+ head_one tail_one
+
+ The HEAD_PAD is set to the number of consecutive 0s at the head. If there
+ is no consecutive 0s, HEAD_PAD is set to 0.
+ If the tail has consecutive 0s, TAIL_PAD is a positive number. Otherwise,
+ TAIL_PAD is set to a negative number. */
+static HOST_WIDE_INT
+rs6000_peel_long_const (unsigned HOST_WIDE_INT c, int *head_pad,
+ int *tail_pad)
+{
+ unsigned HOST_WIDE_INT allones = -1;
+ int bitsize = GET_MODE_BITSIZE (DImode);
+ int head_zero = clz_hwi (c);
+ int head_one = clz_hwi (~c);
+ int head = head_zero ? head_zero : head_one;
+ int tail_zero = ctz_hwi (c);
+ int tail_one = ctz_hwi (~c);
+ int tail = tail_zero ? tail_zero : tail_one;
+ int len;
+ unsigned HOST_WIDE_INT imm, mask;
+
+ mask = allones << tail & allones >> head;
+ len = bitsize - head - tail;
+ imm = ((c & mask) >> tail) | (allones - ((1UL << len) - 1));
+
+ *head_pad = head_zero;
+ *tail_pad = tail_zero ? tail_zero : (-tail_one);
+ return imm;
+}
+
/* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
Output insns to set DEST equal to the constant C as a series of
lis, ori and shl instructions. */
@@ -9729,6 +9838,67 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
{
rtx temp;
HOST_WIDE_INT ud1, ud2, ud3, ud4;
+ HOST_WIDE_INT imm, imm1, imm2;
+ unsigned HOST_WIDE_INT ui = c;
+ bool done = false;
+ int bitsize = GET_MODE_BITSIZE (DImode);
+ int rotate, head, tail;
+
+ if (SIGNED_INTEGER_NBIT_P (c, 32))
+ {
+ rs6000_emit_set_32bit_const (dest, c);
+ return;
+ }
+
+ imm1 = rs6000_rotate_long_const (ui, &rotate);
+ imm2 = rs6000_peel_long_const (ui, &head, &tail);
+
+ if (SIGNED_INTEGER_NBIT_P (imm1, 32)
+ || SIGNED_INTEGER_NBIT_P (imm2, 32))
+ {
+ imm = (SIGNED_INTEGER_16BIT_P (imm2)
+ || !SIGNED_INTEGER_NBIT_P (imm1, 32))
+ ? imm2 : imm1;
+ temp = can_create_pseudo_p () ? gen_reg_rtx (DImode) : dest;
+
+ rs6000_emit_set_32bit_const (temp, imm);
+
+ if (imm == imm1)
+ emit_move_insn (dest,
+ gen_rtx_ROTATE (DImode, copy_rtx (temp),
+ GEN_INT (bitsize - rotate)));
+ else if (head && (tail > 0))
+ {
+ rtx one = gen_rtx_ROTATE (DImode, copy_rtx (temp),
+ GEN_INT (tail));
+ emit_move_insn (dest,
+ gen_rtx_AND (DImode, one,
+ GEN_INT ((1UL << (bitsize - head))
+ - (1UL << tail))));
+ }
+ else if (head && (tail < 0))
+ {
+ rtx one = gen_rtx_ROTATE (DImode, copy_rtx (temp),
+ GEN_INT (-tail));
+ emit_move_insn (dest,
+ gen_rtx_AND (DImode, one,
+ GEN_INT ((1UL << (bitsize - head))
+ - 1)));
+ }
+ else if (tail > 0)
+ emit_move_insn (dest,
+ gen_rtx_ASHIFT (DImode, copy_rtx (temp),
+ GEN_INT (tail)));
+ else
+ emit_move_insn (dest,
+ gen_rtx_ROTATE (DImode, copy_rtx (temp),
+ GEN_INT (-tail)));
+
+ done = true;
+ }
+
+ if (done)
+ return;
ud1 = c & 0xffff;
c = c >> 16;
diff --git a/gcc/testsuite/gcc.target/powerpc/pr94395.c
b/gcc/testsuite/gcc.target/powerpc/pr94395.c
new file mode 100644
index 00000000000..05648f4cac8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr94395.c
@@ -0,0 +1,45 @@
+/* PR target/94395 */
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+
+unsigned long
+test0 (void)
+{
+ return 0xff200000000222ffUL;
+}
+
+unsigned long
+test1 (void)
+{
+ return 0x000ffffffff92200UL;
+}
+
+unsigned long
+test2 (void)
+{
+ return 0x0001234000000000UL;
+}
+
+unsigned long
+test3 (void)
+{
+ return 0x0001234fffffffffUL;
+}
+
+unsigned long
+test4 (void)
+{
+ return 0xfff1234000000000UL;
+}
+
+unsigned long
+test5 (void)
+{
+ return 0xfff1234fffffffffUL;
+}
+
+/* { dg-final { scan-assembler-times "rotldi" 2 } } */
+/* { dg-final { scan-assembler-times "rldic " 2 } } */
+/* { dg-final { scan-assembler-times "rldicl" 1 } } */
+/* { dg-final { scan-assembler-times "sldi" 1 } } */