While looking into some popcount related I noticed that the popcount cost is not modeled at all. This adds both the vector and scalar (for CSSC) costs. For CSSC, we default to `COSTS_N_INSNS (3)` based on the Ampere1B's cycle count that is found from LLVM's model.
Built and tested for aarch64-linux-gnu. Built also arm-linux-eabi because of the shared structure. PR target/114224 gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_rtx_costs): Handle POPCOUNT. * config/arm/aarch-common-protos.h (struct alu_cost_table): Add pop field. * config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs, thunderx_extra_costs, thunderx2t99_extra_costs, thunderx3t110_extra_costs, tsv110_extra_costs, a64fx_extra_costs, ampere1_extra_costs, ampere1a_extra_costs, ampere1b_extra_costs): Update for pop field. * config/arm/aarch-cost-tables.h (generic_extra_costs, cortexa53_extra_costs, cortexa57_extra_costs, cortexa76_extra_costs, exynosm1_extra_costs, xgene1_extra_costs): Likewise. * config/arm/arm.cc (cortexa9_extra_costs, cortexa8_extra_costs, cortexa5_extra_costs, cortexa7_extra_costs, cortexa12_extra_costs, cortexa15_extra_costs, v7m_extra_costs): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/popcnt11.c: New test. * gcc.target/aarch64/popcnt12.c: New test. Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> --- gcc/config/aarch64/aarch64-cost-tables.h | 9 +++++ gcc/config/aarch64/aarch64.cc | 20 +++++++++++ gcc/config/arm/aarch-common-protos.h | 1 + gcc/config/arm/aarch-cost-tables.h | 6 ++++ gcc/config/arm/arm.cc | 7 ++++ gcc/testsuite/gcc.target/aarch64/popcnt11.c | 37 +++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/popcnt12.c | 37 +++++++++++++++++++++ 7 files changed, 117 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt11.c create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt12.c diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h index 7c794916117..a9005d02d4e 100644 --- a/gcc/config/aarch64/aarch64-cost-tables.h +++ b/gcc/config/aarch64/aarch64-cost-tables.h @@ -42,6 +42,7 @@ const struct cpu_cost_table qdf24xx_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -150,6 +151,7 @@ const struct cpu_cost_table thunderx_extra_costs = 0, /* Bfx. */ COSTS_N_INSNS (5), /* Clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* UNUSED: non_exec. */ false /* UNUSED: non_exec_costs_exec. */ }, @@ -257,6 +259,7 @@ const struct cpu_cost_table thunderx2t99_extra_costs = 0, /* Bfx. */ COSTS_N_INSNS (3), /* Clz. */ 0, /* Rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* Non_exec. */ true /* Non_exec_costs_exec. */ }, @@ -364,6 +367,7 @@ const struct cpu_cost_table thunderx3t110_extra_costs = 0, /* Bfx. */ COSTS_N_INSNS (3), /* Clz. */ 0, /* Rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* Non_exec. */ true /* Non_exec_costs_exec. */ }, @@ -471,6 +475,7 @@ const struct cpu_cost_table tsv110_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -579,6 +584,7 @@ const struct cpu_cost_table a64fx_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -686,6 +692,7 @@ const struct cpu_cost_table ampere1_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -793,6 +800,7 @@ const struct cpu_cost_table ampere1a_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -900,6 +908,7 @@ const struct cpu_cost_table ampere1b_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 7607b85e3cf..c881feaab96 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -14411,6 +14411,25 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, return false; + case POPCOUNT: + if (speed) + { + if (VECTOR_MODE_P (mode)) + *cost += extra_cost->vect.alu; + else if (TARGET_CSSC) + *cost += extra_cost->alu.pop; + else + { + /* POPCOUNT V8QI cost */ + *cost += extra_cost->vect.alu; + /* Reduction if needed (except QImode). */ + if (mode != QImode) + *cost += COSTS_N_INSNS (1) + extra_cost->vect.alu; + } + } + + return false; + case CTZ: if (VECTOR_MODE_P (mode)) { @@ -31223,3 +31242,4 @@ aarch64_libgcc_floating_mode_supported_p struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-aarch64.h" +ÀÀÀÀÀÀÀ \ No newline at end of file diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 9849fcbc098..c5db27be83f 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -70,6 +70,7 @@ struct alu_cost_table const int bfx; /* Bit-field extraction. */ const int clz; /* Count Leading Zeros. */ const int rev; /* Reverse bits/bytes. */ + const int pop; /* Reverse bits/bytes. */ const int non_exec; /* Extra cost when not executing insn. */ const bool non_exec_costs_exec; /* True if non-execution must add the exec cost. */ diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h index 56297f87f69..49bf9e071cc 100644 --- a/gcc/config/arm/aarch-cost-tables.h +++ b/gcc/config/arm/aarch-cost-tables.h @@ -40,6 +40,7 @@ const struct cpu_cost_table generic_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ COSTS_N_INSNS (1), /* non_exec. */ false /* non_exec_costs_exec. */ }, @@ -147,6 +148,7 @@ const struct cpu_cost_table cortexa53_extra_costs = COSTS_N_INSNS (1), /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -254,6 +256,7 @@ const struct cpu_cost_table cortexa57_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -361,6 +364,7 @@ const struct cpu_cost_table cortexa76_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -468,6 +472,7 @@ const struct cpu_cost_table exynosm1_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -575,6 +580,7 @@ const struct cpu_cost_table xgene1_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index d54564a6c35..2f5912dcd01 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -1114,6 +1114,7 @@ const struct cpu_cost_table cortexa9_extra_costs = COSTS_N_INSNS (1), /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1221,6 +1222,7 @@ const struct cpu_cost_table cortexa8_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1328,6 +1330,7 @@ const struct cpu_cost_table cortexa5_extra_costs = COSTS_N_INSNS (1), /* bfx. */ COSTS_N_INSNS (1), /* clz. */ COSTS_N_INSNS (1), /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1437,6 +1440,7 @@ const struct cpu_cost_table cortexa7_extra_costs = COSTS_N_INSNS (1), /* bfx. */ COSTS_N_INSNS (1), /* clz. */ COSTS_N_INSNS (1), /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1545,6 +1549,7 @@ const struct cpu_cost_table cortexa12_extra_costs = COSTS_N_INSNS (1), /* bfx. */ COSTS_N_INSNS (1), /* clz. */ COSTS_N_INSNS (1), /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1652,6 +1657,7 @@ const struct cpu_cost_table cortexa15_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1759,6 +1765,7 @@ const struct cpu_cost_table v7m_extra_costs = 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ + COSTS_N_INSNS (2), /* pop. */ COSTS_N_INSNS (1), /* non_exec. */ false /* non_exec_costs_exec. */ }, diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt11.c b/gcc/testsuite/gcc.target/aarch64/popcnt11.c new file mode 100644 index 00000000000..2810cbc0826 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/popcnt11.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +/* PR target/114224 */ + +#pragma GCC target "+nocssc" + +/* popcount==1 should be expanded using the `(arg ^ (arg - 1)) > arg - 1` + trick without CSSC with generic tuning. */ + +/* +** fi: +** sub w([0-9]+), w0, #1 +** eor w([0-9]+), w0, w\1 +** cmp w\2, w\1 +** cset w0, hi +** ret +*/ + +int fi(unsigned a) +{ + return __builtin_popcountg(a) == 1; +} + +/* +** fll: +** sub x([0-9]+), x0, #1 +** eor x([0-9]+), x0, x\1 +** cmp x\2, x\1 +** cset w0, hi +** ret +*/ + +int fll(unsigned long long a) +{ + return __builtin_popcountg(a) == 1; +} diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt12.c b/gcc/testsuite/gcc.target/aarch64/popcnt12.c new file mode 100644 index 00000000000..ff980887e56 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/popcnt12.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +/* PR target/114224 */ + +#pragma GCC target "+cssc" + +/* popcount==1 should be expanded using the `(arg ^ (arg - 1)) > arg - 1` + trick even with CSSC enabled with generic tuning. */ + +/* +** fi: +** sub w([0-9]+), w0, #1 +** eor w([0-9]+), w0, w\1 +** cmp w\2, w\1 +** cset w0, hi +** ret +*/ + +int fi(unsigned a) +{ + return __builtin_popcountg(a) == 1; +} + +/* +** fll: +** sub x([0-9]+), x0, #1 +** eor x([0-9]+), x0, x\1 +** cmp x\2, x\1 +** cset w0, hi +** ret +*/ + +int fll(unsigned long long a) +{ + return __builtin_popcountg(a) == 1; +} -- 2.43.0