While looking into some popcount related I noticed that the popcount
cost is not modeled at all. This adds both the vector and scalar (for CSSC)
costs. For CSSC, we default to `COSTS_N_INSNS (3)` based on the Ampere1B's
cycle count that is found from LLVM's model.

Built and tested for aarch64-linux-gnu.
Built also arm-linux-eabi because of the shared structure.

        PR target/114224

gcc/ChangeLog:

        * config/aarch64/aarch64.cc (aarch64_rtx_costs): Handle POPCOUNT.
        * config/arm/aarch-common-protos.h (struct alu_cost_table): Add pop 
field.
        * config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs, 
thunderx_extra_costs,
        thunderx2t99_extra_costs, thunderx3t110_extra_costs,
        tsv110_extra_costs, a64fx_extra_costs,
        ampere1_extra_costs, ampere1a_extra_costs,
        ampere1b_extra_costs): Update for pop field.
        * config/arm/aarch-cost-tables.h (generic_extra_costs, 
cortexa53_extra_costs,
        cortexa57_extra_costs, cortexa76_extra_costs, exynosm1_extra_costs,
        xgene1_extra_costs): Likewise.
        * config/arm/arm.cc (cortexa9_extra_costs, cortexa8_extra_costs,
        cortexa5_extra_costs, cortexa7_extra_costs, cortexa12_extra_costs,
        cortexa15_extra_costs, v7m_extra_costs): Likewise.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/popcnt11.c: New test.
        * gcc.target/aarch64/popcnt12.c: New test.

Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
---
 gcc/config/aarch64/aarch64-cost-tables.h    |  9 +++++
 gcc/config/aarch64/aarch64.cc               | 20 +++++++++++
 gcc/config/arm/aarch-common-protos.h        |  1 +
 gcc/config/arm/aarch-cost-tables.h          |  6 ++++
 gcc/config/arm/arm.cc                       |  7 ++++
 gcc/testsuite/gcc.target/aarch64/popcnt11.c | 37 +++++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/popcnt12.c | 37 +++++++++++++++++++++
 7 files changed, 117 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt11.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt12.c

diff --git a/gcc/config/aarch64/aarch64-cost-tables.h 
b/gcc/config/aarch64/aarch64-cost-tables.h
index 7c794916117..a9005d02d4e 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -42,6 +42,7 @@ const struct cpu_cost_table qdf24xx_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                /* rev.  */
+    COSTS_N_INSNS (2), /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -150,6 +151,7 @@ const struct cpu_cost_table thunderx_extra_costs =
     0,                 /* Bfx.  */
     COSTS_N_INSNS (5), /* Clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* UNUSED: non_exec.  */
     false              /* UNUSED: non_exec_costs_exec.  */
   },
@@ -257,6 +259,7 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
     0,                 /* Bfx.  */
     COSTS_N_INSNS (3), /* Clz.  */
     0,                 /* Rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* Non_exec.  */
     true               /* Non_exec_costs_exec.  */
   },
@@ -364,6 +367,7 @@ const struct cpu_cost_table thunderx3t110_extra_costs =
     0,                 /* Bfx.  */
     COSTS_N_INSNS (3), /* Clz.  */
     0,                 /* Rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* Non_exec.  */
     true               /* Non_exec_costs_exec.  */
   },
@@ -471,6 +475,7 @@ const struct cpu_cost_table tsv110_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -579,6 +584,7 @@ const struct cpu_cost_table a64fx_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2), /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -686,6 +692,7 @@ const struct cpu_cost_table ampere1_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2), /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -793,6 +800,7 @@ const struct cpu_cost_table ampere1a_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2), /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -900,6 +908,7 @@ const struct cpu_cost_table ampere1b_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2), /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 7607b85e3cf..c881feaab96 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -14411,6 +14411,25 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer 
ATTRIBUTE_UNUSED,
 
       return false;
 
+    case POPCOUNT:
+      if (speed)
+       {
+         if (VECTOR_MODE_P (mode))
+           *cost += extra_cost->vect.alu;
+         else if (TARGET_CSSC)
+           *cost += extra_cost->alu.pop;
+         else
+           {
+             /* POPCOUNT V8QI cost */
+             *cost += extra_cost->vect.alu;
+             /* Reduction if needed (except QImode). */
+             if (mode != QImode)
+               *cost += COSTS_N_INSNS (1) + extra_cost->vect.alu;
+           }
+       }
+
+      return false;
+
     case CTZ:
       if (VECTOR_MODE_P (mode))
        {
@@ -31223,3 +31242,4 @@ aarch64_libgcc_floating_mode_supported_p
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-aarch64.h"
+
\ No newline at end of file
diff --git a/gcc/config/arm/aarch-common-protos.h 
b/gcc/config/arm/aarch-common-protos.h
index 9849fcbc098..c5db27be83f 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -70,6 +70,7 @@ struct alu_cost_table
   const int bfx;               /* Bit-field extraction.  */
   const int clz;               /* Count Leading Zeros.  */
   const int rev;               /* Reverse bits/bytes.  */
+  const int pop;               /* Reverse bits/bytes.  */
   const int non_exec;          /* Extra cost when not executing insn.  */
   const bool non_exec_costs_exec; /* True if non-execution must add the exec
                                     cost.  */
diff --git a/gcc/config/arm/aarch-cost-tables.h 
b/gcc/config/arm/aarch-cost-tables.h
index 56297f87f69..49bf9e071cc 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -40,6 +40,7 @@ const struct cpu_cost_table generic_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     COSTS_N_INSNS (1), /* non_exec.  */
     false              /* non_exec_costs_exec.  */
   },
@@ -147,6 +148,7 @@ const struct cpu_cost_table cortexa53_extra_costs =
     COSTS_N_INSNS (1), /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -254,6 +256,7 @@ const struct cpu_cost_table cortexa57_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -361,6 +364,7 @@ const struct cpu_cost_table cortexa76_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                  /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -468,6 +472,7 @@ const struct cpu_cost_table exynosm1_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -575,6 +580,7 @@ const struct cpu_cost_table xgene1_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index d54564a6c35..2f5912dcd01 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -1114,6 +1114,7 @@ const struct cpu_cost_table cortexa9_extra_costs =
     COSTS_N_INSNS (1), /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -1221,6 +1222,7 @@ const struct cpu_cost_table cortexa8_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -1328,6 +1330,7 @@ const struct cpu_cost_table cortexa5_extra_costs =
     COSTS_N_INSNS (1), /* bfx.  */
     COSTS_N_INSNS (1), /* clz.  */
     COSTS_N_INSNS (1), /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -1437,6 +1440,7 @@ const struct cpu_cost_table cortexa7_extra_costs =
     COSTS_N_INSNS (1), /* bfx.  */
     COSTS_N_INSNS (1), /* clz.  */
     COSTS_N_INSNS (1), /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -1545,6 +1549,7 @@ const struct cpu_cost_table cortexa12_extra_costs =
     COSTS_N_INSNS (1), /* bfx.  */
     COSTS_N_INSNS (1), /* clz.  */
     COSTS_N_INSNS (1), /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -1652,6 +1657,7 @@ const struct cpu_cost_table cortexa15_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     0,                 /* non_exec.  */
     true               /* non_exec_costs_exec.  */
   },
@@ -1759,6 +1765,7 @@ const struct cpu_cost_table v7m_extra_costs =
     0,                 /* bfx.  */
     0,                 /* clz.  */
     0,                 /* rev.  */
+    COSTS_N_INSNS (2),  /* pop.  */
     COSTS_N_INSNS (1), /* non_exec.  */
     false              /* non_exec_costs_exec.  */
   },
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt11.c 
b/gcc/testsuite/gcc.target/aarch64/popcnt11.c
new file mode 100644
index 00000000000..2810cbc0826
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt11.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR target/114224 */
+
+#pragma GCC target "+nocssc"
+
+/* popcount==1 should be expanded using the `(arg ^ (arg - 1)) > arg - 1`
+   trick without CSSC with generic tuning. */
+
+/*
+** fi:
+**     sub     w([0-9]+), w0, #1
+**     eor     w([0-9]+), w0, w\1
+**     cmp     w\2, w\1
+**     cset    w0, hi
+**     ret
+*/
+
+int fi(unsigned a)
+{
+  return __builtin_popcountg(a) == 1;
+}
+
+/*
+** fll:
+**     sub     x([0-9]+), x0, #1
+**     eor     x([0-9]+), x0, x\1
+**     cmp     x\2, x\1
+**     cset    w0, hi
+**     ret
+*/
+
+int fll(unsigned long long a)
+{
+  return __builtin_popcountg(a) == 1;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt12.c 
b/gcc/testsuite/gcc.target/aarch64/popcnt12.c
new file mode 100644
index 00000000000..ff980887e56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt12.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR target/114224 */
+
+#pragma GCC target "+cssc"
+
+/* popcount==1 should be expanded using the `(arg ^ (arg - 1)) > arg - 1`
+   trick even with CSSC enabled with generic tuning. */
+
+/*
+** fi:
+**     sub     w([0-9]+), w0, #1
+**     eor     w([0-9]+), w0, w\1
+**     cmp     w\2, w\1
+**     cset    w0, hi
+**     ret
+*/
+
+int fi(unsigned a)
+{
+  return __builtin_popcountg(a) == 1;
+}
+
+/*
+** fll:
+**     sub     x([0-9]+), x0, #1
+**     eor     x([0-9]+), x0, x\1
+**     cmp     x\2, x\1
+**     cset    w0, hi
+**     ret
+*/
+
+int fll(unsigned long long a)
+{
+  return __builtin_popcountg(a) == 1;
+}
-- 
2.43.0

Reply via email to