[PATCH 4/7] sparc: reworked M7 DFA based on instruction subtypes

Jose E. Marchesi Thu, 29 Jun 2017 04:45:07 -0700

This patch reworks the M7 DFA scheduler to use instruction subtypes.  It
also removes the v3pipe insn attribute from sparc.md, as it is no longer
needed.


gcc/ChangeLog:

        * config/sparc/niagara7.md: Rework the DFA scheduler to use insn
        subtypes.
        * config/sparc/sparc.md: Remove the `v3pipe' insn attribute.
        ("*movdi_insn_sp32"): Likewise.
        ("*movsi_insn"): Likewise.
        ("*movdi_insn_sp64"): Likewise.
        ("*movsf_insn"): Likewise.
        ("*movdf_insn_sp32"): Likewise.
        ("*movdf_insn_sp64"): Likewise.
        ("*zero_extendsidi2_insn_sp64"): Likewise.
        ("*sign_extendsidi2_insn"): Likewise.
        ("*mov<VM32:mode>_insn"): Likewise.
        ("*mov<VM64:mode>_insn_sp64"): Likewise.
        ("*mov<VM64:mode>_insn_sp32"): Likewise.
        ("<plusminus_insn><VADDSUB:mode>3"): Likewise.
        ("<vlop:code><VL:mode>3"): Likewise.
        ("*not_<vlop:code><VL:mode>3"): Likewise.
        ("*nand<VL:mode>_vis"): Likewise.
        ("*<vlnotop:code>_not1<VL:mode>_vis"): Likewise.
        ("*<vlnotop:code>_not2<VL:mode>_vis"): Likewise.
        ("one_cmpl<VL:mode>2"): Likewise.
        ("faligndata<VM64:mode>_vis"): Likewise.
        ("alignaddrsi_vis"): Likewise.
        ("alignaddrdi_vis"): Likweise.
        ("alignaddrlsi_vis"): Likewise.
        ("alignaddrldi_vis"): Likewise.
        ("fcmp<gcond:code><GCM:gcm_name><P:mode>_vis"): Likewise.
        ("bmaskdi_vis"): Likewise.
        ("bmasksi_vis"): Likewise.
        ("bshuffle<VM64:mode>_vis"): Likewise.
        ("cmask8<P:mode>_vis"): Likewise.
        ("cmask16<P:mode>_vis"): Likewise.
        ("cmask32<P:mode>_vis"): Likewise.
        ("pdistn<P:mode>_vis"): Likewise.
        ("<vis3_addsub_ss_patname><VASS:mode>3"): Likewise.
---
 gcc/ChangeLog                |  38 +++++++++
 gcc/config/sparc/niagara7.md | 181 ++++++++++++++++++++++++++++++-------------
 gcc/config/sparc/sparc.md    |  93 +++++++---------------
 3 files changed, 192 insertions(+), 120 deletions(-)

diff --git a/gcc/config/sparc/niagara7.md b/gcc/config/sparc/niagara7.md
index 3f46198..23b6707 100644
--- a/gcc/config/sparc/niagara7.md
+++ b/gcc/config/sparc/niagara7.md
@@ -19,64 +19,120 @@
 
 (define_automaton "niagara7_0")
 
-(define_cpu_unit "n7_slot0,n7_slot1,n7_slot2" "niagara7_0")
-(define_reservation "n7_single_issue" "n7_slot0 + n7_slot1 + n7_slot2")
+;; The S4 core has a dual-issue queue.  This queue is divided into two
+;; slots.  One instruction can be issued each cycle to each slot, and
+;; up to 2 instructions are committed each cycle.  Each slot serves
+;; several execution units, as depicted below:
+;;
+;;
+;;                 m7_slot0 - Integer unit.
+;;                          - Load/Store unit.
+;; === QUEUE ==>
+;;
+;;                 m7_slot1 - Integer unit.
+;;                          - Branch unit.
+;;                          - Floating-point and graphics unit.
+;;                          - 3-cycles crypto unit.
 
-(define_cpu_unit "n7_load_store" "niagara7_0")
+(define_cpu_unit "n7_slot0,n7_slot1" "niagara7_0")
+
+;; Some instructions stall the pipeline and avoid any other
+;; instruction to be issued in the same cycle.  We assume the same for
+;; multi-instruction insns.
+
+(define_reservation "n7_single_issue" "n7_slot0 + n7_slot1")
 
 (define_insn_reservation "n7_single" 1
   (and (eq_attr "cpu" "niagara7")
     (eq_attr "type" "multi,savew,flushw,trap"))
   "n7_single_issue")
 
-(define_insn_reservation "n7_iflush" 27
-  (and (eq_attr "cpu" "niagara7")
-       (eq_attr "type" "iflush"))
-  "(n7_slot0 | n7_slot1), nothing*26")
+;; Most of the instructions executing in the integer unit have a
+;; latency of 1.
 
 (define_insn_reservation "n7_integer" 1
   (and (eq_attr "cpu" "niagara7")
     (eq_attr "type" "ialu,ialuX,shift,cmove,compare"))
   "(n7_slot0 | n7_slot1)")
 
+;; Flushing the instruction memory takes 27 cycles.
+
+(define_insn_reservation "n7_iflush" 27
+  (and (eq_attr "cpu" "niagara7")
+       (eq_attr "type" "iflush"))
+  "(n7_slot0 | n7_slot1), nothing*26")
+
+;; The integer multiplication instructions have a latency of 12 cycles
+;; and execute in the integer unit.
+;;
+;; Likewise for array*, edge* and pdistn instructions.
+
 (define_insn_reservation "n7_imul" 12
   (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "imul"))
-  "n7_slot1, nothing*11")
+    (eq_attr "type" "imul,array,edge,edgen,pdistn"))
+  "(n7_slot0 | n7_slot1), nothing*11")
+
+;; The integer division instructions have a latency of 35 cycles and
+;; execute in the integer unit.
 
 (define_insn_reservation "n7_idiv" 35
   (and (eq_attr "cpu" "niagara7")
     (eq_attr "type" "idiv"))
-  "n7_slot1, nothing*34")
+  "(n7_slot0 | n7_slot1), nothing*34")
+
+;; Both integer and floating-point load instructions have a latency of
+;; 5 cycles, and execute in the slot0.
+;;
+;; The prefetch instruction also executes in the load/store unit, but
+;; its latency is only 1 cycle.
 
 (define_insn_reservation "n7_load" 5
   (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "load,fpload,sload"))
-  "(n7_slot0 + n7_load_store), nothing*4")
+       (ior (eq_attr "type" "fpload,sload")
+            (and (eq_attr "type" "load")
+                 (eq_attr "subtype" "regular"))))
+  "n7_slot0, nothing*4")
+
+(define_insn_reservation "n7_prefetch" 1
+  (and (eq_attr "cpu" "niagara7")
+       (eq_attr "type" "load")
+       (eq_attr "subtype" "prefetch"))
+  "n7_slot0")
+
+;; Both integer and floating-point store instructions have a latency
+;; of 1 cycle, and execute in the load/store unit in slot0.
 
 (define_insn_reservation "n7_store" 1
   (and (eq_attr "cpu" "niagara7")
     (eq_attr "type" "store,fpstore"))
-  "(n7_slot0 | n7_slot2) + n7_load_store")
+  "n7_slot0")
+
+;; Control-transfer instructions execute in the Branch Unit in the
+;; slot1.
 
 (define_insn_reservation "n7_cti" 1
   (and (eq_attr "cpu" "niagara7")
     (eq_attr "type" 
"cbcond,uncond_cbcond,branch,call,sibcall,call_no_delay_slot,uncond_branch,return"))
   "n7_slot1")
 
+;; Many instructions executing in the Floating-point and Graphics unit
+;; in the slot1 feature a latency of 11 cycles.
+
 (define_insn_reservation "n7_fp" 11
   (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul"))
+       (ior (eq_attr "type" 
"fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul,fgm_pack,fgm_mul,pdist")
+            (and (eq_attr "type" "fga")
+                 (eq_attr "subtype" "fpu,maxmin"))))
   "n7_slot1, nothing*10")
 
-(define_insn_reservation "n7_array" 12
-  (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "array,bmask,edge,edgen"))
-  "n7_slot1, nothing*11")
+;; Floating-point division and floating-point square-root instructions
+;; have high latencies.  They execute in the floating-point and
+;; graphics unit in the slot1.
+
 
 (define_insn_reservation "n7_fpdivs" 24
   (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "fpdivs,fpsqrts"))
+       (eq_attr "type" "fpdivs,fpsqrts"))
   "n7_slot1, nothing*23")
 
 (define_insn_reservation "n7_fpdivd" 37
@@ -84,53 +140,66 @@
     (eq_attr "type" "fpdivd,fpsqrtd"))
   "n7_slot1, nothing*36")
 
-(define_insn_reservation "n7_lzd" 12
-  (and (eq_attr "cpu" "niagara7")
-       (eq_attr "type" "lzd"))
-  "(n7_slot0 | n7_slot1), nothing*11")
-
-;; There is an internal unit called the "V3 pipe", that was originally
-;; intended to process some of the short cryptographic instructions.
-;; However, as soon as in the T4 several of the VIS instructions
-;; (notably non-FP instructions) have been moved to the V3 pipe.
-;; Consequently, these instructions feature a latency of 3 instead of
-;; 11 or 12 cycles, provided their consumers also execute in the V3
-;; pipe.
+;; SIMD VIS instructions executing in the Floating-point and graphics
+;; unit (FPG) in slot1 usually have a latency of either 11 or 12
+;; cycles.
 ;;
-;; This is modelled here with a bypass.
+;; However, the latency for many instructions is only 3 cycles if the
+;; consumer can also be executed in 3 cycles.  We model this with a
+;; bypass.  In these cases the instructions are executed in the
+;; 3-cycle crypto unit which also serves slot1.
+
+(define_insn_reservation "n7_vis_11cycles" 11
+  (and (eq_attr "cpu" "niagara7")
+       (ior (and (eq_attr "type" "fga")
+                 (eq_attr "subtype" "addsub64,other"))
+            (and (eq_attr "type" "vismv")
+                 (eq_attr "subtype" "double,single"))
+            (and (eq_attr "type" "visl")
+                 (eq_attr "subtype" "double,single"))))
+  "n7_slot1, nothing*10")
 
-(define_insn_reservation "n7_vis_fga" 11
+(define_insn_reservation "n7_vis_12cycles" 12
   (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "fga,gsr"))
-  "n7_slot1, nothing*10")
+       (ior (eq_attr "type" "bmask,viscmp")
+            (and (eq_attr "type" "fga")
+                 (eq_attr "subtype" "cmask"))
+            (and (eq_attr "type" "vismv")
+                 (eq_attr "subtype" "movstouw"))))
+  "n7_slot1, nothing*11")
+
+(define_bypass 3 "n7_vis_*" "n7_vis_*")
+
+;; Some other VIS instructions have a latency of 12 cycles, and won't
+;; be executed in the 3-cycle crypto pipe.
 
-(define_insn_reservation "n7_vis_fgm" 11
+(define_insn_reservation "n7_lzd" 12
   (and (eq_attr "cpu" "niagara7")
-    (eq_attr "type" "fgm_pack,fgm_mul,pdist"))
-  "n7_slot1, nothing*10")
+       (ior (eq_attr "type" "lzd,")
+            (and (eq_attr "type" "gsr")
+                 (eq_attr "subtype" "alignaddr"))))
+  "n7_slot1, nothing*11")
 
-(define_insn_reservation "n7_vis_move_v3pipe" 11
+;; A couple of VIS instructions feature very low latencies in the M7.
+
+(define_insn_reservation "n7_single_vis" 1
   (and (eq_attr "cpu" "niagara7")
-    (and (eq_attr "type" "vismv")
-         (eq_attr "v3pipe" "true")))
+       (eq_attr "type" "vismv")
+       (eq_attr "subtype" "movxtod"))
   "n7_slot1")
 
-(define_insn_reservation "n7_vis_move_11cycle" 11
+(define_insn_reservation "n7_double_vis" 2
   (and (eq_attr "cpu" "niagara7")
-    (and (eq_attr "type" "vismv")
-         (eq_attr "v3pipe" "false")))
-  "n7_slot1, nothing*10")
+       (eq_attr "type" "vismv")
+       (eq_attr "subtype" "movdtox"))
+  "n7_slot1, nothing")
 
-(define_insn_reservation "n7_vis_logical_v3pipe" 11
-  (and (eq_attr "cpu" "niagara7")
-    (and (eq_attr "type" "visl,viscmp,pdistn")
-         (eq_attr "v3pipe" "true")))
-  "n7_slot1, nothing*2")
+;; Reading and writing to the gsr register takes a high number of
+;; cycles that is not documented in the PRM.  Let's use the same value
+;; than the M8.
 
-(define_insn_reservation "n7_vis_logical_11cycle" 11
+(define_insn_reservation "n7_gsr_reg" 70
   (and (eq_attr "cpu" "niagara7")
-    (and (eq_attr "type" "visl,viscmp")
-      (eq_attr "v3pipe" "false")))
-  "n7_slot1, nothing*10")
-
-(define_bypass 3 "n7*_v3pipe" "n7_*_v3pipe")
+       (eq_attr "type" "gsr")
+       (eq_attr "subtype" "reg"))
+  "n7_slot1, nothing*70")
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index d1bf6a7..b550f037 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -574,9 +574,6 @@
           (const_string "true")
        ] (const_string "false")))
 
-;; True if the instruction executes in the V3 pipeline, in M7 and later 
processors.
-(define_attr "v3pipe" "false,true" (const_string "false"))
-
 (define_delay (eq_attr "type" "call")
   [(eq_attr "in_call_delay" "true") (nil) (nil)])
 
@@ -1656,8 +1653,7 @@
    fones\t%0"
   [(set_attr "type" 
"*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
    (set_attr "subtype" "*,*,regular,*,movstouw,single,*,*,*,single,single")
-   (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")
-   (set_attr "v3pipe" "*,*,*,*,true,true,*,*,*,true,true")])
+   (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")])
 
 (define_insn "*movsi_lo_sum"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -1830,7 +1826,6 @@ visl")
    (set_attr "length" "*,2,*,*,*,*,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
    (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
    (set_attr "cpu_feature" 
"v9,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")
-   (set_attr "v3pipe" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,true,true")
    (set_attr "lra" "*,*,disabled,disabled,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
 
 (define_insn "*movdi_insn_sp64"
@@ -1854,8 +1849,7 @@ visl")
   [(set_attr "type" 
"*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
    (set_attr "subtype" "*,*,regular,*,movdtox,movxtod,*,*,*,double,double")
    (set_attr "fptype" "*,*,*,*,*,*,double,*,*,double,double")
-   (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")
-   (set_attr "v3pipe" "*,*,*,*,*,*,*,*,*,true,true")])
+   (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")])
 
 (define_expand "movdi_pic_label_ref"
   [(set (match_dup 3) (high:DI
@@ -2385,8 +2379,7 @@ visl")
 }
   [(set_attr "type" 
"visl,visl,fpmove,*,*,*,vismv,vismv,fpload,load,fpstore,store")
    (set_attr "subtype" "single,single,*,*,*,*,movstouw,single,*,regular,*,*")
-   (set_attr "cpu_feature" "vis,vis,fpu,*,*,*,vis3,vis3,fpu,*,fpu,*")
-   (set_attr "v3pipe" "true,true,*,*,*,*,true,true,*,*,*,*")])
+   (set_attr "cpu_feature" "vis,vis,fpu,*,*,*,vis3,vis3,fpu,*,fpu,*")])
 
 ;; The following 3 patterns build SFmode constants in integer registers.
 
@@ -2462,7 +2455,6 @@ visl")
    (set_attr "length" "*,2,*,*,*,2,2,2,*,*,*,*,2,2,2,*,*")
    (set_attr "fptype" "*,*,double,double,double,*,*,*,*,*,*,*,*,*,*,*,*")
    (set_attr "cpu_feature" 
"v9,*,vis,vis,v9,fpunotv9,vis3,vis3,fpu,fpu,*,*,fpu,fpu,*,*,*")
-   (set_attr "v3pipe" "*,*,true,true,*,*,*,*,*,*,*,*,*,*,*,*,*")
    (set_attr "lra" "*,*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
 
 (define_insn "*movdf_insn_sp64"
@@ -2487,8 +2479,7 @@ visl")
    (set_attr "subtype" 
"double,double,*,movdtox,movxtod,regular,*,*,regular,*,*")
    (set_attr "length" "*,*,*,*,*,*,*,*,*,*,2")
    (set_attr "fptype" "double,double,double,double,double,*,*,*,*,*,*")
-   (set_attr "cpu_feature" "vis,vis,fpu,vis3,vis3,fpu,fpu,*,*,*,*")
-   (set_attr "v3pipe" "true,true,*,*,*,*,*,*,*,*,*")])
+   (set_attr "cpu_feature" "vis,vis,fpu,vis3,vis3,fpu,fpu,*,*,*,*")])
 
 ;; This pattern builds DFmode constants in integer registers.
 (define_split
@@ -3119,8 +3110,7 @@ visl")
    movstouw\t%1, %0"
   [(set_attr "type" "shift,load,vismv")
    (set_attr "subtype" "*,regular,movstouw")
-   (set_attr "cpu_feature" "*,*,vis3")
-   (set_attr "v3pipe" "*,*,true")])
+   (set_attr "cpu_feature" "*,*,vis3")])
 
 (define_insn_and_split "*zero_extendsidi2_insn_sp32"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -3435,8 +3425,7 @@ visl")
   movstosw\t%1, %0"
   [(set_attr "type" "shift,sload,vismv")
    (set_attr "us3load_type" "*,3cycle,*")
-   (set_attr "cpu_feature" "*,*,vis3")
-   (set_attr "v3pipe" "*,*,true")])
+   (set_attr "cpu_feature" "*,*,vis3")])
 
 
 ;; Special pattern for optimizing bit-field compares.  This is needed
@@ -8645,8 +8634,7 @@ visl")
   movwtos\t%1, %0"
   [(set_attr "type" 
"visl,visl,vismv,fpload,fpstore,store,load,store,*,vismv,vismv")
    (set_attr "subtype" 
"single,single,single,*,*,*,regular,*,*,movstouw,single")
-   (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,*,vis3,vis3")
-   (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,true,true")])
+   (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,*,vis3,vis3")])
 
 (define_insn "*mov<VM64:mode>_insn_sp64"
   [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,e,W,m,*r, m,*r, 
e,*r")
@@ -8669,8 +8657,7 @@ visl")
   mov\t%1, %0"
   [(set_attr "type" 
"visl,visl,vismv,fpload,fpstore,store,load,store,vismv,vismv,*")
    (set_attr "subtype" 
"double,double,double,*,*,*,regular,*,movdtox,movxtod,*")
-   (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,vis3,vis3,*")
-   (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,*,*")])
+   (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,vis3,vis3,*")])
 
 (define_insn "*mov<VM64:mode>_insn_sp32"
   [(set (match_operand:VM64 0 "nonimmediate_operand"
@@ -8702,7 +8689,6 @@ visl")
    (set_attr "subtype" 
"*,*,double,double,double,*,*,*,*,regular,*,*,*,*,regular,*")
    (set_attr "length" "*,2,*,*,*,2,2,*,*,*,*,2,2,2,*,*")
    (set_attr "cpu_feature" "*,*,vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*,*,*")
-   (set_attr "v3pipe" "*,*,true,true,true,*,*,*,*,*,*,*,*,*,*,*")
    (set_attr "lra" "*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
 
 (define_split
@@ -8781,8 +8767,7 @@ visl")
   "fp<plusminus_insn><vbits>\t%1, %2, %0"
   [(set_attr "type" "fga")
    (set_attr "subtype" "other")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 (define_mode_iterator VL [V1SI V2HI V4QI V1DI V2SI V4HI V8QI])
 (define_mode_attr vlsuf [(V1SI "s") (V2HI "s") (V4QI "s")
@@ -8798,8 +8783,7 @@ visl")
   "TARGET_VIS"
   "f<vlinsn><vlsuf>\t%1, %2, %0"
   [(set_attr "type" "visl")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 (define_insn "*not_<vlop:code><VL:mode>3"
   [(set (match_operand:VL 0 "register_operand" "=<vconstr>")
@@ -8808,8 +8792,7 @@ visl")
   "TARGET_VIS"
   "f<vlninsn><vlsuf>\t%1, %2, %0"
   [(set_attr "type" "visl")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 ;; (ior (not (op1)) (not (op2))) is the canonical form of NAND.
 (define_insn "*nand<VL:mode>_vis"
@@ -8819,8 +8802,7 @@ visl")
   "TARGET_VIS"
   "fnand<vlsuf>\t%1, %2, %0"
   [(set_attr "type" "visl")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 (define_code_iterator vlnotop [ior and])
 
@@ -8831,8 +8813,7 @@ visl")
   "TARGET_VIS"
   "f<vlinsn>not1<vlsuf>\t%1, %2, %0"
   [(set_attr "type" "visl")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 (define_insn "*<vlnotop:code>_not2<VL:mode>_vis"
   [(set (match_operand:VL 0 "register_operand" "=<vconstr>")
@@ -8841,8 +8822,7 @@ visl")
   "TARGET_VIS"
   "f<vlinsn>not2<vlsuf>\t%1, %2, %0"
   [(set_attr "type" "visl")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 (define_insn "one_cmpl<VL:mode>2"
   [(set (match_operand:VL 0 "register_operand" "=<vconstr>")
@@ -8850,8 +8830,7 @@ visl")
   "TARGET_VIS"
   "fnot1<vlsuf>\t%1, %0"
   [(set_attr "type" "visl")
-   (set_attr "fptype" "<vfptype>")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "<vfptype>")])
 
 ;; Hard to generate VIS instructions.  We have builtins for these.
 
@@ -9054,8 +9033,7 @@ visl")
   "faligndata\t%1, %2, %0"
   [(set_attr "type" "fga")
    (set_attr "subtype" "other")
-   (set_attr "fptype" "double")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "double")])
 
 (define_insn "alignaddrsi_vis"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -9066,8 +9044,7 @@ visl")
   "TARGET_VIS"
   "alignaddr\t%r1, %r2, %0"
   [(set_attr "type" "gsr")
-   (set_attr "subtype" "alignaddr")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "alignaddr")])
 
 (define_insn "alignaddrdi_vis"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -9078,8 +9055,7 @@ visl")
   "TARGET_VIS"
   "alignaddr\t%r1, %r2, %0"
   [(set_attr "type" "gsr")
-   (set_attr "subtype" "alignaddr")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "alignaddr")])
 
 (define_insn "alignaddrlsi_vis"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -9091,8 +9067,7 @@ visl")
   "TARGET_VIS"
   "alignaddrl\t%r1, %r2, %0"
   [(set_attr "type" "gsr")
-   (set_attr "subtype" "alignaddr")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "alignaddr")])
 
 (define_insn "alignaddrldi_vis"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -9104,8 +9079,7 @@ visl")
   "TARGET_VIS"
   "alignaddrl\t%r1, %r2, %0"
   [(set_attr "type" "gsr")
-   (set_attr "subtype" "alignaddr")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "alignaddr")])
 
 (define_insn "pdist_vis"
   [(set (match_operand:DI 0 "register_operand" "=e")
@@ -9197,8 +9171,7 @@ visl")
         UNSPEC_FCMP))]
   "TARGET_VIS"
   "fcmp<gcond:code><GCM:gcm_name>\t%1, %2, %0"
-  [(set_attr "type" "viscmp")
-   (set_attr "v3pipe" "true")])
+  [(set_attr "type" "viscmp")])
 
 (define_insn "fpcmp<gcond:code>8<P:mode>_vis"
   [(set (match_operand:P 0 "register_operand" "=r")
@@ -9270,8 +9243,7 @@ visl")
         (plus:DI (match_dup 1) (match_dup 2)))]
   "TARGET_VIS2 && TARGET_ARCH64"
   "bmask\t%r1, %r2, %0"
-  [(set_attr "type" "bmask")
-   (set_attr "v3pipe" "true")])
+  [(set_attr "type" "bmask")])
 
 (define_insn "bmasksi_vis"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -9281,8 +9253,7 @@ visl")
         (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
   "TARGET_VIS2"
   "bmask\t%r1, %r2, %0"
-  [(set_attr "type" "bmask")
-   (set_attr "v3pipe" "true")])
+  [(set_attr "type" "bmask")])
 
 (define_insn "bshuffle<VM64:mode>_vis"
   [(set (match_operand:VM64 0 "register_operand" "=e")
@@ -9294,8 +9265,7 @@ visl")
   "bshuffle\t%1, %2, %0"
   [(set_attr "type" "fga")
    (set_attr "subtype" "other")
-   (set_attr "fptype" "double")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "double")])
 
 ;; The rtl expanders will happily convert constant permutations on other
 ;; modes down to V8QI.  Rely on this to avoid the complexity of the byte
@@ -9398,8 +9368,7 @@ visl")
   "TARGET_VIS3"
   "cmask8\t%r0"
   [(set_attr "type" "fga")
-   (set_attr "subtype" "cmask")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "cmask")])
 
 (define_insn "cmask16<P:mode>_vis"
   [(set (reg:DI GSR_REG)
@@ -9409,8 +9378,7 @@ visl")
   "TARGET_VIS3"
   "cmask16\t%r0"
   [(set_attr "type" "fga")
-   (set_attr "subtype" "cmask")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "cmask")])
 
 (define_insn "cmask32<P:mode>_vis"
   [(set (reg:DI GSR_REG)
@@ -9420,8 +9388,7 @@ visl")
   "TARGET_VIS3"
   "cmask32\t%r0"
   [(set_attr "type" "fga")
-   (set_attr "subtype" "cmask")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "cmask")])
 
 (define_insn "fchksm16_vis"
   [(set (match_operand:V4HI 0 "register_operand" "=e")
@@ -9456,8 +9423,7 @@ visl")
   "TARGET_VIS3"
   "pdistn\t%1, %2, %0"
   [(set_attr "type" "pdistn")
-   (set_attr "fptype" "double")
-   (set_attr "v3pipe" "true")])
+   (set_attr "fptype" "double")])
 
 (define_insn "fmean16_vis"
   [(set (match_operand:V4HI 0 "register_operand" "=e")
@@ -9509,8 +9475,7 @@ visl")
   "TARGET_VIS3"
   "<vis3_addsub_ss_insn><vbits>\t%1, %2, %0"
   [(set_attr "type" "fga")
-   (set_attr "subtype" "other")
-   (set_attr "v3pipe" "true")])
+   (set_attr "subtype" "other")])
 
 (define_mode_iterator VMMAX [V8QI V4HI V2SI])
 (define_code_iterator vis4_minmax [smin smax])
-- 
2.3.4

[PATCH 4/7] sparc: reworked M7 DFA based on instruction subtypes

Reply via email to