Generating arbitrary 64-bit constants on POWER can take up to 5 instructions. However, some special constants can be generated in fewer instructions. One special class of constants we don't handle, is constants that have one set of consecutive 1-bits. These can be generated with a "li rT,-1" followed by a "rldic rX,rT,SH,MB" instruction. The following patch implements this idea.
This has passed bootstrap and regtesting on powerpc64le-linux with no regressions. Testing on powerpc64-linux is still running. Ok for trunk if the BE testing comes back clean too? Peter gcc/ PR target/93176 * config/rs6000/rs6000.c (has_consecutive_ones): New function. (num_insns_constant_gpr): Use it. (rs6000_emit_set_long_const): Likewise. * config/rs6000/rs6000.md UNSPEC_RLDIC: New unspec. (rldic): New. gcc/testsuite/ PR target/93176 * gcc.target/powerpc/pr93176.c: New test. diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index ca5b71ecdd3..273cab14138 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -5728,6 +5728,47 @@ direct_return (void) return 0; } +/* Helper for num_insns_constant_gpr and rs6000_emit_set_long_const. + Return TRUE if VALUE contains one set of consecutive 1-bits. Also set + *SH and *MB to values needed to generate VALUE with the rldic instruction. + We accept consecutive 1-bits that wrap from MSB to LSB, ex: 0xff00...00ff. + Otherwise, return FALSE. */ + +static bool +has_consecutive_ones (unsigned HOST_WIDE_INT value, int *sh, int *mb) +{ + unsigned HOST_WIDE_INT nlz, ntz, mask; + unsigned HOST_WIDE_INT allones = -1; + + ntz = ctz_hwi (value); + nlz = clz_hwi (value); + mask = (allones >> nlz) & (allones << ntz); + if (value == mask) + { + /* Compute beginning and ending bit numbers, using IBM bit numbering. */ + *mb = nlz; + *sh = ntz; + return true; + } + + /* Check if the inverted value contains consecutive ones. We can create + that constant by basically swapping the MB and ME bit numbers. */ + value = ~value; + ntz = ctz_hwi (value); + nlz = clz_hwi (value); + mask = (allones >> nlz) & (allones << ntz); + if (value == mask) + { + /* Compute beginning and ending bit numbers, using IBM bit numbering. */ + *mb = GET_MODE_BITSIZE (DImode) - ntz; + *sh = GET_MODE_BITSIZE (DImode) - nlz; + return true; + } + + *sh = *mb = 0; + return false; +} + /* Helper for num_insns_constant. Calculate number of instructions to load VALUE to a single gpr using combinations of addi, addis, ori, oris and sldi instructions. */ @@ -5752,10 +5793,14 @@ num_insns_constant_gpr (HOST_WIDE_INT value) { HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000; HOST_WIDE_INT high = value >> 31; + int sh, mb; if (high == 0 || high == -1) return 2; + if (has_consecutive_ones (value, &sh, &mb)) + return 2; + high >>= 1; if (low == 0) @@ -9427,7 +9472,8 @@ static void rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) { rtx temp; - HOST_WIDE_INT ud1, ud2, ud3, ud4; + HOST_WIDE_INT ud1, ud2, ud3, ud4, value = c; + int sh, mb; ud1 = c & 0xffff; c = c >> 16; @@ -9453,6 +9499,12 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) gen_rtx_IOR (DImode, copy_rtx (temp), GEN_INT (ud1))); } + else if (has_consecutive_ones (value, &sh, &mb)) + { + temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (copy_rtx (temp), CONSTM1_RTX (DImode))); + emit_insn (gen_rldic (dest, copy_rtx (temp), GEN_INT (sh), GEN_INT (mb))); + } else if (ud3 == 0 && ud4 == 0) { temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 43b620ae1c0..feb5884505c 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -154,6 +154,7 @@ UNSPEC_CNTTZDM UNSPEC_PDEPD UNSPEC_PEXTD + UNSPEC_RLDIC ]) ;; @@ -9173,6 +9174,14 @@ DONE; }) +(define_insn "rldic" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (unspec:DI [(match_operand:DI 1 "gpc_reg_operand" "r") + (match_operand:DI 2 "u6bit_cint_operand" "n") + (match_operand:DI 3 "u6bit_cint_operand" "n")] + UNSPEC_RLDIC))] + "TARGET_POWERPC64" + "rldic %0,%1,%2,%3") ;; TImode/PTImode is similar, except that we usually want to compute the ;; address into a register and use lsi/stsi (the exception is during reload). diff --git a/gcc/testsuite/gcc.target/powerpc/pr93176.c b/gcc/testsuite/gcc.target/powerpc/pr93176.c new file mode 100644 index 00000000000..d4d93f8f1b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr93176.c @@ -0,0 +1,49 @@ +/* PR target/93176 */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2" } */ + +/* Verify we generate the correct 2 instruction sequence: + li rT,-1; rldic rX,rT,SH,MB for the constants below. */ + +unsigned long +test0 (void) +{ + return 0x00ffffffffffff00UL; +} + +unsigned long +test1 (void) +{ + return 0x00ffffffff000000UL; +} + +unsigned long +test2 (void) +{ + return 0x00ffff0000000000UL; +} + +unsigned long +test3 (void) +{ + return 0xffffff0000000000UL; +} + +unsigned long +test4 (void) +{ + return 0xffffff000000ffffUL; +} + +unsigned long +test5 (void) +{ + return 0x0000010000000000UL; +} + +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,8,8" } } */ +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,24,8" } } */ +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,8" } } */ +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,48" } } */ +/* { dg-final { scan-assembler "rldic r?\[0-9\]+,r?\[0-9\]+,40,23" } } */