Add Power10 scheduling description.

This patch adds the Power10 scheduling description. Since power10.md was pretty 
much a complete rewrite (existing version of power10.md is mostly just a copy 
of power9.md), I diffed power10.md with /dev/null so that the full contents of 
the file are shown as opposed to a diff. This should make it easier to read. 
This patch will not apply on current trunk do to that reason.
 
Bootstrap/regtest on powerpc64le (Power8/Power10) with no new regressions. Ok 
for trunk?

-Pat


2020-11-13  Pat Haugen  <pthau...@linux.ibm.com>

gcc/
        * config/rs6000/rs6000.c (struct processor_costs): New.
        (rs6000_option_override_internal): Set Power10 costs.
        (rs6000_issue_rate): Set Power10 issue rate.
        * config/rs6000/power10.md: Rewrite for Power10.

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 4d528a39a37..85bb42d6dce 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1080,6 +1080,26 @@ struct processor_costs power9_cost = {
   COSTS_N_INSNS (3),   /* SF->DF convert */
 };
 
+/* Instruction costs on POWER10 processors.  */
+static const
+struct processor_costs power10_cost = {
+  COSTS_N_INSNS (1),   /* mulsi */
+  COSTS_N_INSNS (1),   /* mulsi_const */
+  COSTS_N_INSNS (1),   /* mulsi_const9 */
+  COSTS_N_INSNS (1),   /* muldi */
+  COSTS_N_INSNS (4),   /* divsi */
+  COSTS_N_INSNS (4),   /* divdi */
+  COSTS_N_INSNS (2),   /* fp */
+  COSTS_N_INSNS (2),   /* dmul */
+  COSTS_N_INSNS (7),   /* sdiv */
+  COSTS_N_INSNS (9),   /* ddiv */
+  128,                 /* cache line size */
+  32,                  /* l1 cache */
+  512,                 /* l2 cache */
+  16,                  /* prefetch streams */
+  COSTS_N_INSNS (2),   /* SF->DF convert */
+};
+
 /* Instruction costs on POWER A2 processors.  */
 static const
 struct processor_costs ppca2_cost = {
@@ -4734,10 +4754,13 @@ rs6000_option_override_internal (bool global_init_p)
        break;
 
       case PROCESSOR_POWER9:
-      case PROCESSOR_POWER10:
        rs6000_cost = &power9_cost;
        break;
 
+      case PROCESSOR_POWER10:
+       rs6000_cost = &power10_cost;
+       break;
+
       case PROCESSOR_PPCA2:
        rs6000_cost = &ppca2_cost;
        break;
@@ -18001,8 +18024,9 @@ rs6000_issue_rate (void)
   case PROCESSOR_POWER8:
     return 7;
   case PROCESSOR_POWER9:
-  case PROCESSOR_POWER10:
     return 6;
+  case PROCESSOR_POWER10:
+    return 8;
   default:
     return 1;
   }
diff --git a/gcc/config/rs6000/power10.md b/gcc/config/rs6000/power10.md
new file mode 100644
index 00000000000..f9ca4cbf10e
--- /dev/null
+++ b/gcc/config/rs6000/power10.md
@@ -0,0 +1,553 @@
+;; Scheduling description for the IBM POWER10 processor.
+;; Copyright (C) 2020-2020 Free Software Foundation, Inc.
+;;
+;; Contributed by Pat Haugen (pthau...@us.ibm.com).
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; For Power10 we model (and try to pack) the in-order decode/dispatch groups
+; which consist of 8 instructions max.  We do not try to model the details of
+; the out-of-order issue queues and how insns flow to the various execution
+; units except for the simple representation of the issue limitation of at
+; most 4 insns to the execution units/2 insns to the load units/2 insns to
+; the store units.
+(define_automaton "power10dsp,power10issue,power10div")
+
+; Decode/dispatch slots
+(define_cpu_unit "du0_power10,du1_power10,du2_power10,du3_power10,
+                 du4_power10,du5_power10,du6_power10,du7_power10" "power10dsp")
+
+; Four execution units
+(define_cpu_unit "exu0_power10,exu1_power10,exu2_power10,exu3_power10"
+                "power10issue")
+; Two load units and two store units
+(define_cpu_unit "lu0_power10,lu1_power10" "power10issue")
+(define_cpu_unit "stu0_power10,stu1_power10" "power10issue")
+; Create false units for use by non-pipelined div/sqrt
+(define_cpu_unit "fx_div0_power10,fx_div1_power10" "power10div")
+(define_cpu_unit "fp_div0_power10,fp_div1_power10,fp_div2_power10,
+                 fp_div3_power10" "power10div")
+
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du0_power10" "du1_power10,du2_power10,du3_power10,du4_power10,\
+              du5_power10,du6_power10,du7_power10")
+(absence_set "du1_power10" "du2_power10,du3_power10,du4_power10,du5_power10,\
+              du6_power10,du7_power10")
+(absence_set "du2_power10" "du3_power10,du4_power10,du5_power10,du6_power10,\
+             du7_power10")
+(absence_set "du3_power10" "du4_power10,du5_power10,du6_power10,du7_power10")
+(absence_set "du4_power10" "du5_power10,du6_power10,du7_power10")
+(absence_set "du5_power10" "du6_power10,du7_power10")
+(absence_set "du6_power10" "du7_power10")
+
+
+; Dispatch port reservations
+;
+; Power10 can dispatch a maximum of 8 iops per cycle. With a maximum of
+; 4 VSU/2 Load/2 Store per cycle.
+
+; Any dispatch slot
+(define_reservation "DU_any_power10"
+                   "du0_power10|du1_power10|du2_power10|du3_power10|
+                    du4_power10|du5_power10|du6_power10|du7_power10")
+
+; Even slot, actually takes even/odd slots
+(define_reservation "DU_even_power10"
+                   "du0_power10+du1_power10|du2_power10+du3_power10|
+                    du4_power10+du5_power10|du6_power10+du7_power10")
+
+; 4-way cracked (consumes whole decode/dispatch cycle)
+(define_reservation "DU_all_power10"
+                   "du0_power10+du1_power10+du2_power10+du3_power10+
+                    du4_power10+du5_power10+du6_power10+du7_power10")
+
+
+; Execution unit reservations
+(define_reservation "LU_power10"
+                   "lu0_power10|lu1_power10")
+
+(define_reservation "STU_power10"
+                   "stu0_power10|stu1_power10")
+
+; Certain simple fixed-point insns can execute in the Store-agen pipe
+(define_reservation "SXU_power10"
+                   "stu0_power10|stu1_power10")
+
+(define_reservation "EXU_power10"
+                   "exu0_power10|exu1_power10|exu2_power10|exu3_power10")
+
+(define_reservation "EXU_super_power10"
+                   "exu0_power10+exu1_power10|exu2_power10+exu3_power10")
+
+; Define the reservations to be used by div/sqrt which allows other insns
+; to be issued to the VSU, but blocks other div/sqrt for a number of cycles.
+(define_reservation "FX_DIV_power10"
+                    "fx_div0_power10*8|fx_div1_power10*8")
+(define_reservation "FP_DIVS_power10"
+                    "fp_div0_power10*5|fp_div1_power10*5|fp_div2_power10*5|
+                     fp_div3_power10*5")
+(define_reservation "FP_DIV_power10"
+                    "fp_div0_power10*7|fp_div1_power10*7|fp_div2_power10*7|
+                     fp_div3_power10*7")
+
+
+; Load Unit
+(define_insn_reservation "power10-load" 4
+  (and (eq_attr "type" "load")
+       (eq_attr "update" "no")
+       (eq_attr "size" "!128")
+       (eq_attr "prefixed" "no")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,LU_power10")
+
+(define_insn_reservation "power10-prefixed-load" 4
+  (and (eq_attr "type" "load")
+       (eq_attr "update" "no")
+       (eq_attr "size" "!128")
+       (eq_attr "prefixed" "!no")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,LU_power10")
+
+(define_insn_reservation "power10-load-update" 4
+  (and (eq_attr "type" "load")
+       (eq_attr "update" "yes")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,LU_power10+SXU_power10")
+
+(define_insn_reservation "power10-fpload-double" 4
+  (and (eq_attr "type" "fpload")
+       (eq_attr "update" "no")
+       (eq_attr "size" "64")
+       (eq_attr "prefixed" "no")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,LU_power10")
+
+(define_insn_reservation "power10-prefixed-fpload-double" 4
+  (and (eq_attr "type" "fpload")
+       (eq_attr "update" "no")
+       (eq_attr "size" "64")
+       (eq_attr "prefixed" "!no")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,LU_power10")
+
+(define_insn_reservation "power10-fpload-update-double" 4
+  (and (eq_attr "type" "fpload")
+       (eq_attr "update" "yes")
+       (eq_attr "size" "64")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,LU_power10+SXU_power10")
+
+; SFmode loads are cracked and have additional 3 cycles over DFmode
+; Prefixed forms behave the same
+(define_insn_reservation "power10-fpload-single" 7
+  (and (eq_attr "type" "fpload")
+       (eq_attr "update" "no")
+       (eq_attr "size" "32")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,LU_power10")
+
+(define_insn_reservation "power10-fpload-update-single" 7
+  (and (eq_attr "type" "fpload")
+       (eq_attr "update" "yes")
+       (eq_attr "size" "32")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,LU_power10+SXU_power10")
+
+(define_insn_reservation "power10-vecload" 4
+  (and (eq_attr "type" "vecload")
+       (eq_attr "size" "!256")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,LU_power10")
+
+; lxvp
+(define_insn_reservation "power10-vecload-pair" 4
+  (and (eq_attr "type" "vecload")
+       (eq_attr "size" "256")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,LU_power10+SXU_power10")
+
+; Store Unit
+(define_insn_reservation "power10-store" 0
+  (and (eq_attr "type" "store,fpstore,vecstore")
+       (eq_attr "update" "no")
+       (eq_attr "prefixed" "no")
+       (eq_attr "size" "!128")
+       (eq_attr "size" "!256")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,STU_power10")
+
+(define_insn_reservation "power10-prefixed-store" 0
+  (and (eq_attr "type" "store,fpstore,vecstore")
+       (eq_attr "prefixed" "!no")
+       (eq_attr "size" "!128")
+       (eq_attr "size" "!256")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,STU_power10")
+
+; Update forms have 2 cycle latency for updated addr reg
+(define_insn_reservation "power10-store-update" 2
+  (and (eq_attr "type" "store,fpstore")
+       (eq_attr "update" "yes")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,STU_power10")
+
+; stxvp
+(define_insn_reservation "power10-vecstore-pair" 0
+  (and (eq_attr "type" "vecstore")
+       (eq_attr "size" "256")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,stu0_power10+stu1_power10")
+
+(define_insn_reservation "power10-larx" 4
+  (and (eq_attr "type" "load_l")
+       (eq_attr "size" "!128")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,LU_power10")
+
+; All load quad forms
+(define_insn_reservation "power10-lq" 4
+  (and (eq_attr "type" "load,load_l")
+       (eq_attr "size" "128")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,LU_power10+SXU_power10")
+
+(define_insn_reservation "power10-stcx" 0
+  (and (eq_attr "type" "store_c")
+       (eq_attr "size" "!128")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,STU_power10")
+
+; All store quad forms
+(define_insn_reservation "power10-stq" 0
+  (and (eq_attr "type" "store,store_c")
+       (eq_attr "size" "128")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,stu0_power10+stu1_power10")
+
+(define_insn_reservation "power10-sync" 1
+  (and (eq_attr "type" "sync,isync")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,STU_power10")
+
+
+; VSU Execution Unit
+
+; Fixed point ops
+
+; Most ALU insns are simple 2 cycle, including record form
+(define_insn_reservation "power10-alu" 2
+  (and (eq_attr "type" "add,exts,integer,logical,isel")
+       (eq_attr "prefixed" "no")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+; 4 cycle CR latency
+(define_bypass 4 "power10-alu"
+                "power10-crlogical,power10-mfcr,power10-mfcrf")
+
+; paddi
+(define_insn_reservation "power10-paddi" 2
+  (and (eq_attr "type" "add")
+       (eq_attr "prefixed" "!no")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,EXU_power10")
+
+; Rotate/shift (non-record form)
+(define_insn_reservation "power10-rot" 2
+  (and (eq_attr "type" "insert,shift")
+       (eq_attr "dot" "no")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+; Record form rotate/shift
+(define_insn_reservation "power10-rot-compare" 3
+  (and (eq_attr "type" "insert,shift")
+       (eq_attr "dot" "yes")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+; 5 cycle CR latency
+(define_bypass 5 "power10-rot-compare"
+                "power10-crlogical,power10-mfcr,power10-mfcrf")
+
+(define_insn_reservation "power10-alu2" 3
+  (and (eq_attr "type" "cntlz,popcnt,trap")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+; 5 cycle CR latency
+(define_bypass 5 "power10-alu2"
+                "power10-crlogical,power10-mfcr,power10-mfcrf")
+
+(define_insn_reservation "power10-cmp" 2
+  (and (eq_attr "type" "cmp")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+; Treat 'two' and 'three' types as 2 or 3 way cracked
+(define_insn_reservation "power10-two" 4
+  (and (eq_attr "type" "two")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,EXU_power10")
+
+(define_insn_reservation "power10-three" 6
+  (and (eq_attr "type" "three")
+       (eq_attr "cpu" "power10"))
+  "DU_all_power10,EXU_power10")
+
+(define_insn_reservation "power10-mul" 5
+  (and (eq_attr "type" "mul")
+       (eq_attr "dot" "no")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+; 4 cycle MUL->MUL latency
+(define_bypass 4 "power10-mul"
+                "power10-mul,power10-mul-compare")
+
+(define_insn_reservation "power10-mul-compare" 5
+  (and (eq_attr "type" "mul")
+       (eq_attr "dot" "yes")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,EXU_power10")
+; 4 cycle MUL->MUL latency
+(define_bypass 4 "power10-mul-compare"
+                "power10-mul,power10-mul-compare")
+; 7 cycle CR latency
+(define_bypass 7 "power10-mul-compare"
+                "power10-crlogical,power10-mfcr,power10-mfcrf")
+
+(define_insn_reservation "power10-div" 12
+  (and (eq_attr "type" "div")
+       (eq_attr "dot" "no")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10,FX_DIV_power10")
+
+(define_insn_reservation "power10-div-compare" 12
+  (and (eq_attr "type" "div")
+       (eq_attr "dot" "yes")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,EXU_power10,FX_DIV_power10")
+; 14 cycle CR latency
+(define_bypass 14 "power10-div-compare"
+                "power10-crlogical,power10-mfcr,power10-mfcrf")
+
+(define_insn_reservation "power10-crlogical" 2
+  (and (eq_attr "type" "cr_logical")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-mfcrf" 2
+  (and (eq_attr "type" "mfcrf")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-mfcr" 3
+  (and (eq_attr "type" "mfcr")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,EXU_power10")
+
+; Should differentiate between 1 cr field and > 1 since target of > 1 cr
+; is cracked
+(define_insn_reservation "power10-mtcr" 3
+  (and (eq_attr "type" "mtcr")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-mtjmpr" 3
+  (and (eq_attr "type" "mtjmpr")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-mfjmpr" 2
+  (and (eq_attr "type" "mfjmpr")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+
+; Floating point/Vector ops
+
+(define_insn_reservation "power10-fpsimple" 3
+  (and (eq_attr "type" "fpsimple")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-fp" 5
+  (and (eq_attr "type" "fp,dmul")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-fpcompare" 3
+  (and (eq_attr "type" "fpcompare")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-sdiv" 22
+  (and (eq_attr "type" "sdiv")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10,FP_DIVS_power10")
+
+(define_insn_reservation "power10-ddiv" 27
+  (and (eq_attr "type" "ddiv")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10,FP_DIV_power10")
+
+(define_insn_reservation "power10-sqrt" 26
+  (and (eq_attr "type" "ssqrt")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10,FP_DIVS_power10")
+
+(define_insn_reservation "power10-dsqrt" 36
+  (and (eq_attr "type" "dsqrt")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10,FP_DIV_power10")
+
+(define_insn_reservation "power10-vec-2cyc" 2
+  (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-veccmp" 3
+  (and (eq_attr "type" "veccmp")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-vecsimple" 2
+  (and (eq_attr "type" "vecsimple")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-vecnormal" 5
+  (and (eq_attr "type" "vecfloat,vecdouble")
+       (eq_attr "size" "!128")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-qp" 12
+  (and (eq_attr "type" "vecfloat,vecdouble")
+       (eq_attr "size" "128")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-vecperm" 3
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "prefixed" "no")
+       (eq_attr "dot" "no")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-vecperm-compare" 3
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "dot" "yes")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,EXU_power10")
+
+(define_insn_reservation "power10-prefixed-vecperm" 3
+  (and (eq_attr "type" "vecperm")
+       (eq_attr "prefixed" "!no")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,EXU_power10")
+
+(define_insn_reservation "power10-veccomplex" 6
+  (and (eq_attr "type" "veccomplex")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-vecfdiv" 24
+  (and (eq_attr "type" "vecfdiv")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10,FP_DIVS_power10")
+
+(define_insn_reservation "power10-vecdiv" 27
+  (and (eq_attr "type" "vecdiv")
+       (eq_attr "size" "!128")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10,FP_DIV_power10")
+
+(define_insn_reservation "power10-qpdiv" 56
+  (and (eq_attr "type" "vecdiv")
+       (eq_attr "size" "128")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10,FP_DIV_power10")
+
+(define_insn_reservation "power10-qpmul" 24
+  (and (eq_attr "type" "qmul")
+       (eq_attr "size" "128")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-mtvsr" 2
+  (and (eq_attr "type" "mtvsr")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-mfvsr" 2
+  (and (eq_attr "type" "mfvsr")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+
+; Branch
+; Branch is 2 cycles, grouped with STU for issue
+(define_insn_reservation "power10-branch" 2
+  (and (eq_attr "type" "jmpreg,branch")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,STU_power10")
+
+
+; Crypto
+(define_insn_reservation "power10-crypto" 4
+  (and (eq_attr "type" "crypto")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+
+; HTM
+(define_insn_reservation "power10-htm" 2
+  (and (eq_attr "type" "htmsimple,htm")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+
+; DFP
+; Use the minimum 12 cycle latency for all insns, even though some are more
+(define_insn_reservation "power10-dfp" 12
+  (and (eq_attr "type" "dfp")
+       (eq_attr "size" "!128")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_power10")
+
+(define_insn_reservation "power10-dfpq" 12
+  (and (eq_attr "type" "dfp")
+       (eq_attr "size" "128")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,EXU_power10")
+
+; MMA
+(define_insn_reservation "power10-mma" 9
+  (and (eq_attr "type" "mma")
+       (eq_attr "prefixed" "no")
+       (eq_attr "cpu" "power10"))
+  "DU_any_power10,EXU_super_power10")
+
+(define_insn_reservation "power10-prefixed-mma" 9
+  (and (eq_attr "type" "mma")
+       (eq_attr "prefixed" "!no")
+       (eq_attr "cpu" "power10"))
+  "DU_even_power10,EXU_super_power10")
+; 4 cycle MMA->MMA latency
+(define_bypass 4 "power10-mma,power10-prefixed-mma"
+                "power10-mma,power10-prefixed-mma")
+
+

Reply via email to