https://gcc.gnu.org/g:f87b632f98dc6d5cf8ed75d57d15adb8d7ffea6e

commit r16-2382-gf87b632f98dc6d5cf8ed75d57d15adb8d7ffea6e
Author: Kyrylo Tkachov <ktkac...@nvidia.com>
Date:   Thu Jul 17 06:06:16 2025 -0700

    aarch64: NFC - Make vec_* rtx costing logic consistent
    
    The rtx costs logic for CONST_VECTOR, VEC_DUPLICATE and VEC_SELECT sets
    the cost unconditionally to the movi, dup or extract fields of extra_cost,
    when the normal practice in that function is to use extra_cost only when 
speed
    is set.  When speed is false the function should estimate the size cost 
only.
    This patch makes the logic consistent by using the extra_cost fields to
    increment the cost when speed is set.  This requires reducing the 
extra_cost values
    of the movi, dup and extract fields by COSTS_N_INSNS (1), as every insn 
being costed
    has a cost of COSTS_N_INSNS (1) at the start of the function.  The cost 
tables for
    the CPUs are updated in line with this.
    
    With these changes the testsuite is unaffected so no different costing
    decisions are made and this patch is just a cleanup.
    
    Bootstrapped and tested on aarch64-none-linux-gnu.
    
    Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
    
    gcc/
    
            * config/aarch64/aarch64.cc (aarch64_rtx_costs): Add extra_cost 
values
            only when speed is true for CONST_VECTOR, VEC_DUPLICATE, VEC_SELECT
            cases.
            * config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs,
            thunderx_extra_costs, thunderx2t99_extra_costs,
            thunderx3t110_extra_costs, tsv110_extra_costs, a64fx_extra_costs,
            ampere1_extra_costs, ampere1a_extra_costs, ampere1b_extra_costs):
            Reduce cost of movi, dup, extract fields by COSTS_N_INSNS (1).
            * config/arm/aarch-cost-tables.h (generic_extra_costs,
            cortexa53_extra_costs, cortexa57_extra_costs, cortexa76_extra_costs,
            exynosm1_extra_costs, xgene1_extra_costs): Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-cost-tables.h | 54 ++++++++++++++++----------------
 gcc/config/aarch64/aarch64.cc            | 33 +++++++++++--------
 gcc/config/arm/aarch-cost-tables.h       | 36 ++++++++++-----------
 3 files changed, 65 insertions(+), 58 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cost-tables.h 
b/gcc/config/aarch64/aarch64-cost-tables.h
index c49ff7f62ef2..e7926eb4a0e4 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -125,9 +125,9 @@ const struct cpu_cost_table qdf24xx_extra_costs =
   {
     COSTS_N_INSNS (1),  /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -233,9 +233,9 @@ const struct cpu_cost_table thunderx_extra_costs =
   {
     COSTS_N_INSNS (1), /* Alu.  */
     COSTS_N_INSNS (4), /* mult.  */
-    COSTS_N_INSNS (1), /* movi.  */
-    COSTS_N_INSNS (2), /* dup.  */
-    COSTS_N_INSNS (2)  /* extract.  */
+    COSTS_N_INSNS (0), /* movi.  */
+    COSTS_N_INSNS (1), /* dup.  */
+    COSTS_N_INSNS (1)  /* extract.  */
   }
 };
 
@@ -340,9 +340,9 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
   {
     COSTS_N_INSNS (1), /* Alu.  */
     COSTS_N_INSNS (4), /* Mult.  */
-    COSTS_N_INSNS (1), /* movi.  */
-    COSTS_N_INSNS (2), /* dup.  */
-    COSTS_N_INSNS (2)  /* extract.  */
+    COSTS_N_INSNS (0), /* movi.  */
+    COSTS_N_INSNS (1), /* dup.  */
+    COSTS_N_INSNS (1)  /* extract.  */
   }
 };
 
@@ -447,9 +447,9 @@ const struct cpu_cost_table thunderx3t110_extra_costs =
   {
     COSTS_N_INSNS (1), /* Alu.  */
     COSTS_N_INSNS (4), /* Mult.  */
-    COSTS_N_INSNS (1), /* movi.  */
-    COSTS_N_INSNS (2), /* dup.  */
-    COSTS_N_INSNS (2)  /* extract.  */
+    COSTS_N_INSNS (0), /* movi.  */
+    COSTS_N_INSNS (1), /* dup.  */
+    COSTS_N_INSNS (1)  /* extract.  */
   }
 };
 
@@ -555,9 +555,9 @@ const struct cpu_cost_table tsv110_extra_costs =
   {
     COSTS_N_INSNS (1),  /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -662,9 +662,9 @@ const struct cpu_cost_table a64fx_extra_costs =
   {
     COSTS_N_INSNS (1),  /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -769,9 +769,9 @@ const struct cpu_cost_table ampere1_extra_costs =
   {
     COSTS_N_INSNS (3),  /* alu.  */
     COSTS_N_INSNS (3),  /* mult.  */
-    COSTS_N_INSNS (2),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (1),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -876,9 +876,9 @@ const struct cpu_cost_table ampere1a_extra_costs =
   {
     COSTS_N_INSNS (3),  /* alu.  */
     COSTS_N_INSNS (3),  /* mult.  */
-    COSTS_N_INSNS (2),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (1),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -983,9 +983,9 @@ const struct cpu_cost_table ampere1b_extra_costs =
   {
     COSTS_N_INSNS (1),  /* alu.  */
     COSTS_N_INSNS (2),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (1),  /* dup.  */
-    COSTS_N_INSNS (1)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (0),  /* dup.  */
+    COSTS_N_INSNS (0)   /* extract.  */
   }
 };
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 0485f695941c..72a691b8e2f2 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -15854,11 +15854,14 @@ cost_plus:
        break;
     case CONST_VECTOR:
        {
-         /* Load using MOVI/MVNI.  */
-         if (aarch64_simd_valid_mov_imm (x))
-           *cost = extra_cost->vect.movi;
-         else /* Load using constant pool.  */
-           *cost = extra_cost->ldst.load;
+         if (speed)
+           {
+             /* Load using MOVI/MVNI.  */
+             if (aarch64_simd_valid_mov_imm (x))
+               *cost += extra_cost->vect.movi;
+             else /* Load using constant pool.  */
+               *cost += extra_cost->ldst.load;
+           }
          break;
        }
     case VEC_CONCAT:
@@ -15867,7 +15870,8 @@ cost_plus:
        break;
     case VEC_DUPLICATE:
        /* Load using a DUP.  */
-       *cost = extra_cost->vect.dup;
+       if (speed)
+         *cost += extra_cost->vect.dup;
        return false;
     case VEC_SELECT:
        {
@@ -15875,13 +15879,16 @@ cost_plus:
          *cost = rtx_cost (op0, GET_MODE (op0), VEC_SELECT, 0, speed);
 
          /* cost subreg of 0 as free, otherwise as DUP */
-         rtx op1 = XEXP (x, 1);
-         if (vec_series_lowpart_p (mode, GET_MODE (op1), op1))
-           ;
-         else if (vec_series_highpart_p (mode, GET_MODE (op1), op1))
-           *cost = extra_cost->vect.dup;
-         else
-           *cost = extra_cost->vect.extract;
+         if (speed)
+           {
+             rtx op1 = XEXP (x, 1);
+             if (vec_series_lowpart_p (mode, GET_MODE (op1), op1))
+               ;
+             else if (vec_series_highpart_p (mode, GET_MODE (op1), op1))
+               *cost += extra_cost->vect.dup;
+             else
+               *cost += extra_cost->vect.extract;
+           }
          return true;
        }
     default:
diff --git a/gcc/config/arm/aarch-cost-tables.h 
b/gcc/config/arm/aarch-cost-tables.h
index c7a14b3750d8..0600e590089b 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -123,9 +123,9 @@ const struct cpu_cost_table generic_extra_costs =
   {
     COSTS_N_INSNS (1), /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -230,9 +230,9 @@ const struct cpu_cost_table cortexa53_extra_costs =
   {
     COSTS_N_INSNS (1), /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -337,9 +337,9 @@ const struct cpu_cost_table cortexa57_extra_costs =
   {
     COSTS_N_INSNS (1),  /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -444,9 +444,9 @@ const struct cpu_cost_table cortexa76_extra_costs =
   {
     COSTS_N_INSNS (1),  /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -551,9 +551,9 @@ const struct cpu_cost_table exynosm1_extra_costs =
   {
     COSTS_N_INSNS (0),  /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -658,9 +658,9 @@ const struct cpu_cost_table xgene1_extra_costs =
   {
     COSTS_N_INSNS (2),  /* alu.  */
     COSTS_N_INSNS (8),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };

Reply via email to