When checking eq/neq with a constant which has only 16bits, then it can be optimized to check the rotated data. By this, the constant building is optimized.
As the example in PR103743: For "in == 0x8000000000000000LL", this patch generates: rotldi %r3,%r3,16 cmpldi %cr0,%r3,32768 instead: li %r9,-1 rldicr %r9,%r9,0,0 cmpd %cr0,%r3,%r9 This patch pass bootstrap and regtest on ppc64 and ppc64le. Ok for trunk? Thanks! BR, Jiufu PR target/103743 gcc/ChangeLog: * config/rs6000/rs6000.cc (rot_to_16bits): New function. (rs6000_generate_compare): Compare rotated const for eq. gcc/testsuite/ChangeLog: * gcc.target/powerpc/pr103743.c: New test. * gcc.target/powerpc/pr103743_1.c: New test. --- gcc/config/rs6000/rs6000.cc | 62 +++++++++++++ gcc/testsuite/gcc.target/powerpc/pr103743.c | 48 ++++++++++ gcc/testsuite/gcc.target/powerpc/pr103743_1.c | 87 +++++++++++++++++++ 3 files changed, 197 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr103743.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr103743_1.c diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 3afe78f5d04..4aa1d0ca4ab 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -14860,6 +14860,42 @@ rs6000_reverse_condition (machine_mode mode, enum rtx_code code) return reverse_condition (code); } +/* If SRC is a constant with only 16bits, return the number by which + the constant can be rotated to lowest 16bits. + Return 0, if SRC does not meet requirements. + Set *sgn to 1 if the rotated cst is negative, otherwise set it to 0. + Set *res_cst to the rotated constant. */ + +static int +rot_to_16bits (rtx src, machine_mode mode, bool *sgn, rtx *res_cst) +{ + if (!(src && CONST_INT_P (src))) + return 0; + + unsigned HOST_WIDE_INT C = INTVAL (src); + + /* If all 0 except low 16bits. */ + int leadz = clz_hwi (C) + 16; + rtx cst = simplify_gen_binary (ROTATE, mode, src, GEN_INT (leadz)); + if (satisfies_constraint_K (cst)) + { + *res_cst = cst; + return leadz; + } + + /* If all 1 except low 15bits. */ + leadz = clz_hwi (~C) + 15; + cst = simplify_gen_binary (ROTATE, mode, src, GEN_INT (leadz)); + if (satisfies_constraint_I (cst)) + { + *sgn = true; + *res_cst = cst; + return leadz; + } + + return 0; +} + /* Generate a compare for CODE. Return a brand-new rtx that represents the result of the compare. */ @@ -14889,6 +14925,32 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) else comp_mode = CCmode; + /* "i == C" ==> "rotl(i,N) == rotl(C,N)" if rotl(C,N) only low 16bits. */ + if ((code == NE || code == EQ) && mode == DImode) + { + /* The constant would already been set to a reg in the last insn. */ + rtx_insn *last = get_last_insn_anywhere (); + rtx set = last ? single_set (last) : NULL_RTX; + rtx src = (set && SET_DEST (set) == op1) ? SET_SRC (set) : NULL_RTX; + + /* It constant may be in constant pool. */ + if (src && MEM_P (src)) + src = avoid_constant_pool_reference (src); + + rtx cst = NULL_RTX; + bool sgn = false; + int n = rot_to_16bits (src, mode, &sgn, &cst); + if (n >= 16 && n <= 48) + { + rtx dest = gen_reg_rtx (mode); + emit_insn ( + gen_rtx_SET (dest, gen_rtx_ROTATE (mode, op0, GEN_INT (n)))); + op0 = dest; + op1 = cst; + comp_mode = sgn ? CCmode : CCUNSmode; + } + } + /* If we have an unsigned compare, make sure we don't have a signed value as an immediate. */ if (comp_mode == CCUNSmode && CONST_INT_P (op1) diff --git a/gcc/testsuite/gcc.target/powerpc/pr103743.c b/gcc/testsuite/gcc.target/powerpc/pr103743.c new file mode 100644 index 00000000000..606e5e51c4f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr103743.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "cmpldi" 8 } } */ +/* { dg-final { scan-assembler-times "cmpdi" 4 } } */ +/* { dg-final { scan-assembler-times "rotldi" 8 } } */ + +int foo (int a); + +int __attribute__ ((noinline)) udi_fun (unsigned long long in) +{ + if (in == (0x8642000000000000ULL)) + return foo (1); + if (in == (0x7642000000000000ULL)) + return foo (12); + if (in == (0x8000000000000000ULL)) + return foo (32); + if (in == (0x8642FFFFFFFFFFFFULL)) + return foo (46); + if (in == (0x7642FFFFFFFFFFFFULL)) + return foo (51); + if (in == (0x7567000000ULL)) + return foo (9); + if (in == (0xFFF8567FFFFFFFFFULL)) + return foo (19); + + return 0; +} + +int __attribute__ ((noinline)) di_fun (long long in) +{ + if (in == (0x8642000000000000LL)) + return foo (1); + if (in == (0x7642000000000000LL)) + return foo (12); + if (in == (0x8000000000000000LL)) + return foo (32); + if (in == (0x8642FFFFFFFFFFFFLL)) + return foo (46); + if (in == (0x7642FFFFFFFFFFFFLL)) + return foo (51); + if (in == (0x7567000000LL)) + return foo (9); + if (in == (0xFFF8567FFFFFFFFFLL)) + return foo (19); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/pr103743_1.c b/gcc/testsuite/gcc.target/powerpc/pr103743_1.c new file mode 100644 index 00000000000..56fbbe4d4f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr103743_1.c @@ -0,0 +1,87 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -std=c99" } */ + +int +foo (int a) +{ + return a + 6; +} + +int __attribute__ ((noinline)) udi_fun (unsigned long long in) +{ + if (in == (0x8642000000000000ULL)) + return foo (1); + if (in == (0x7642000000000000ULL)) + return foo (12); + if (in == (0x8000000000000000ULL)) + return foo (32); + if (in == (0x8642FFFFFFFFFFFFULL)) + return foo (46); + if (in == (0x7642FFFFFFFFFFFFULL)) + return foo (51); + if (in == (0x7567000000ULL)) + return foo (9); + if (in == (0xFFF8567FFFFFFFFFULL)) + return foo (19); + + return 0; +} + +int __attribute__ ((noinline)) di_fun (long long in) +{ + if (in == (0x8642000000000000LL)) + return foo (1); + if (in == (0x7642000000000000LL)) + return foo (12); + if (in == (0x8000000000000000LL)) + return foo (32); + if (in == (0x8642FFFFFFFFFFFFLL)) + return foo (46); + if (in == (0x7642FFFFFFFFFFFFLL)) + return foo (51); + return 0; +} + +int +main () +{ + int e = 0; + if (udi_fun (6) != 0) + e++; + if (udi_fun (0x8642000000000000ULL) != foo (1)) + e++; + if (udi_fun (0x7642000000000000ULL) != foo (12)) + e++; + if (udi_fun (0x8000000000000000ULL) != foo (32)) + e++; + if (udi_fun (0x8642FFFFFFFFFFFFULL) != foo (46)) + e++; + if (udi_fun (0x7642FFFFFFFFFFFFULL) != foo (51)) + e++; + if (udi_fun (0x7567000000ULL) != foo (9)) + e++; + if (udi_fun (0xFFF8567FFFFFFFFFULL) != foo (19)) + e++; + + if (di_fun (6) != 0) + e++; + if (di_fun (0x8642000000000000LL) != foo (1)) + e++; + if (di_fun (0x7642000000000000LL) != foo (12)) + e++; + if (di_fun (0x8000000000000000LL) != foo (32)) + e++; + if (di_fun (0x8642FFFFFFFFFFFFLL) != foo (46)) + e++; + if (di_fun (0x7642FFFFFFFFFFFFLL) != foo (51)) + e++; + if (udi_fun (0x7567000000LL) != foo (9)) + e++; + if (udi_fun (0xFFF8567FFFFFFFFFLL) != foo (19)) + e++; + + if (e) + __builtin_abort (); + return 0; +} + -- 2.25.1