Hi,

This patch adds predicate move costs to several SVE enabled cores.


2022-02-25  Tamar Christina  <tamar.christ...@arm.com>
                       Andre Vieira <andre.simoesdiasvie...@arm.com>

gcc/ChangeLog:

        * config/aarch64/aarch64-protos.h (struct cpu_regmove_cost): Add PR2PR member.         * config/aarch64/aarch64.cc (aarch64_register_move_cost): Use PR2PR costs when moving a predicate.         (generic_regmove_cost, cortexa57_regmove_cost, exynosm1_regmove_cost thunderx_regmove_cost, xgene1_regmove_cost, qdf24xx_regmove_cost, thunderx2t99_regmove_cost, thunderx3t110_regmove_cost, tsv110_regmove_cost, a64fx_regmove_cost): Add PR2PR entry.
        (cortexa76_regmove_cost): New.
        (neoversen1_tunings): Use cortexa76_regmove_cost.
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
d0e78d6a559a7c310b7f8c7877081a0e2baf6a05..f2fde35c6eb4989af8736db8fad004171c160282
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -192,6 +192,7 @@ struct cpu_regmove_cost
   const int GP2FP;
   const int FP2GP;
   const int FP2FP;
+  const int PR2PR;
 };
 
 struct simd_vec_cost
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
dbeaaf484dbc070ae3fcc08530ec9bd20b8ab651..9a94f3a30b0f1acc3c9b8a0e3d703e60780d0cbc
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -526,7 +526,8 @@ static const struct cpu_regmove_cost generic_regmove_cost =
      their cost higher than memmov_cost.  */
   5, /* GP2FP  */
   5, /* FP2GP  */
-  2 /* FP2FP  */
+  2, /* FP2FP  */
+  2 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost cortexa57_regmove_cost =
@@ -536,7 +537,8 @@ static const struct cpu_regmove_cost cortexa57_regmove_cost 
=
      their cost higher than memmov_cost.  */
   5, /* GP2FP  */
   5, /* FP2GP  */
-  2 /* FP2FP  */
+  2, /* FP2FP  */
+  2 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost cortexa53_regmove_cost =
@@ -546,7 +548,8 @@ static const struct cpu_regmove_cost cortexa53_regmove_cost 
=
      their cost higher than memmov_cost.  */
   5, /* GP2FP  */
   5, /* FP2GP  */
-  2 /* FP2FP  */
+  2, /* FP2FP  */
+  2 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost exynosm1_regmove_cost =
@@ -556,7 +559,8 @@ static const struct cpu_regmove_cost exynosm1_regmove_cost =
      their cost higher than memmov_cost (actual, 4 and 9).  */
   9, /* GP2FP  */
   9, /* FP2GP  */
-  1 /* FP2FP  */
+  1, /* FP2FP  */
+  1 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost thunderx_regmove_cost =
@@ -564,7 +568,8 @@ static const struct cpu_regmove_cost thunderx_regmove_cost =
   2, /* GP2GP  */
   2, /* GP2FP  */
   6, /* FP2GP  */
-  4 /* FP2FP  */
+  4, /* FP2FP  */
+  4 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost xgene1_regmove_cost =
@@ -574,7 +579,8 @@ static const struct cpu_regmove_cost xgene1_regmove_cost =
      their cost higher than memmov_cost.  */
   8, /* GP2FP  */
   8, /* FP2GP  */
-  2 /* FP2FP  */
+  2, /* FP2FP  */
+  2 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost qdf24xx_regmove_cost =
@@ -583,7 +589,8 @@ static const struct cpu_regmove_cost qdf24xx_regmove_cost =
   /* Avoid the use of int<->fp moves for spilling.  */
   6, /* GP2FP  */
   6, /* FP2GP  */
-  4 /* FP2FP  */
+  4, /* FP2FP  */
+  4 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
@@ -593,6 +600,7 @@ static const struct cpu_regmove_cost 
thunderx2t99_regmove_cost =
   5, /* GP2FP  */
   6, /* FP2GP  */
   3, /* FP2FP  */
+  3 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost thunderx3t110_regmove_cost =
@@ -601,7 +609,8 @@ static const struct cpu_regmove_cost 
thunderx3t110_regmove_cost =
   /* Avoid the use of int<->fp moves for spilling.  */
   4, /* GP2FP  */
   5, /* FP2GP  */
-  4  /* FP2FP  */
+  4,  /* FP2FP  */
+  4 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost tsv110_regmove_cost =
@@ -611,7 +620,8 @@ static const struct cpu_regmove_cost tsv110_regmove_cost =
      their cost higher than memmov_cost.  */
   2, /* GP2FP  */
   3, /* FP2GP  */
-  2  /* FP2FP  */
+  2,  /* FP2FP  */
+  2 /* PR2PR.  */
 };
 
 static const struct cpu_regmove_cost a64fx_regmove_cost =
@@ -621,7 +631,19 @@ static const struct cpu_regmove_cost a64fx_regmove_cost =
      their cost higher than memmov_cost.  */
   5, /* GP2FP  */
   7, /* FP2GP  */
-  2 /* FP2FP  */
+  2, /* FP2FP  */
+  2 /* PR2PR.  */
+};
+
+static const struct cpu_regmove_cost neoversen1_regmove_cost =
+{
+  1, /* GP2GP  */
+  /* Spilling to int<->fp instead of memory is recommended so set
+     realistic costs compared to memmv_cost.  */
+  3, /* GP2FP  */
+  2, /* FP2GP  */
+  2, /* FP2FP  */
+  1 /* PR2PR.  */
 };
 
 /* Generic costs for Advanced SIMD vector operations.   */
@@ -1698,7 +1720,7 @@ static const struct tune_params neoversen1_tunings =
 {
   &cortexa76_extra_costs,
   &generic_addrcost_table,
-  &generic_regmove_cost,
+  &neoversen1_regmove_cost,
   &cortexa57_vector_cost,
   &generic_branch_cost,
   &generic_approx_modes,
@@ -14438,6 +14460,11 @@ aarch64_register_move_cost (machine_mode mode,
       || (to == GENERAL_REGS && from == STACK_REG))
     return regmove_cost->GP2GP;
 
+  /* Predicate to predicate moves are usually very cheap so cost them
+     separately.  */
+  if (from == PR_REGS && to == PR_REGS)
+    return regmove_cost->PR2PR;
+
   /* To/From the stack register, we move via the gprs.  */
   if (to == STACK_REG || from == STACK_REG)
     return aarch64_register_move_cost (mode, from, GENERAL_REGS)

Reply via email to