Hi gcc-patches group, I am working on adding vulcan.md (machine description) for vulcan cpu in the aarch64 port. However, before proposing the final patch, I would like the basic approach to be reviewed by you all (as changes are there in aarch64.md)
In vulcan, a (load/store) instruction could be scheduled to cpu units in different way based on the addressing mode(e.g. load, or load+integer). So the requirement is to identify the addressing mode of (load/store) instruction's operand while scheduling. For this purpose, a new attribute "addr_type" has been added in the aarch64.md file. This helps in identifying which operands of (load/store) instruction should be considered for finding the addressing mode. vulcan.md, while scheduling, calls a new function aarch64_mem_type_p in the aarch64.c (via match_test) to decide the scheduling option based on the addressing mode. I have copied the code snippet below (complete patch is attached with this mail). Kindly review and give your feedback/comment. Also if you think there could be an better alternative way, kindly suggest. Thanks in advance for your time. <Code Snippet> FILE - gcc/config/aarch64/aarch64-protos.h /* Mask bits to use for for aarch64_mem_type_p. Unshifted/shifted index register variants are separated for scheduling purposes because the distinction matters on some cores. */ #define AARCH64_ADDR_REG_IMM 0x01 #define AARCH64_ADDR_REG_WB 0x02 #define AARCH64_ADDR_REG_REG 0x04 #define AARCH64_ADDR_REG_SHIFT 0x08 #define AARCH64_ADDR_REG_EXT 0x10 #define AARCH64_ADDR_REG_SHIFT_EXT 0x20 #define AARCH64_ADDR_LO_SUM 0x40 #define AARCH64_ADDR_SYMBOLIC 0x80 FILE - gcc/config/aarch64/aarch64.md (define_attr "addr_type" "none,op0,op1,op0addr,op1addr,lo_sum,wb" (const_string "none")) (define_insn "*mov<mode>_aarch64" [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, *w,r,*w, m, m, r,*w,*w") (match_operand:SHORT 1 "general_operand" " r,M,D<hq>,m, m,rZ,*w,*w, r,*w"))] "(register_operand (operands[0], <MODE>mode) || aarch64_reg_or_zero (operands[1], <MODE>mode))" { switch (which_alternative) { case 0: return "mov\t%w0, %w1"; case 1: return "mov\t%w0, %1"; case 2: return aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode); case 3: return "ldr<size>\t%w0, %1"; case 4: return "ldr\t%<size>0, %1"; case 5: return "str<size>\t%w1, %0"; case 6: return "str\t%<size>1, %0"; case 7: return "umov\t%w0, %1.<v>[0]"; case 8: return "dup\t%0.<Vallxd>, %w1"; case 9: return "dup\t%<Vetype>0, %1.<v>[0]"; default: gcc_unreachable (); } } [(set_attr "type" "mov_reg,mov_imm,neon_move,load1,load1,store1,store1,\ neon_to_gp<q>,neon_from_gp<q>,neon_dup") (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes") (set_attr "addr_type" "*,*,*,op1,op1,op0,op0,*,*,*")] ) FILE - gcc/config/aarch64/vulcan.md ;; Integer loads and stores. (define_insn_reservation "vulcan_load_basic" 4 (and (eq_attr "tune" "vulcan") (eq_attr "type" "load1") (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC | AARCH64_ADDR_REG_IMM | AARCH64_ADDR_LO_SUM)")) "vulcan_ls01") (define_insn_reservation "vulcan_load_automod" 4 (and (eq_attr "tune" "vulcan") (eq_attr "type" "load1") (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) "vulcan_ls01,vulcan_i012") FILE - gcc/config/aarch64/aarch64.c /* Return TRUE if INSN uses an address that satisfies any of the (non-strict) addressing modes specified by MASK. This is intended for use in scheduling models that are sensitive to the form of address used by some particular instruction. */ bool aarch64_mem_type_p (rtx_insn *insn, unsigned HOST_WIDE_INT mask) { aarch64_address_info info; bool valid; attr_addr_type addr_type; rtx mem, addr; machine_mode mode; addr_type = get_attr_addr_type (insn); switch (addr_type) { case ADDR_TYPE_WB: info.type = ADDRESS_REG_WB; break; case ADDR_TYPE_LO_SUM: info.type = ADDRESS_LO_SUM; break; case ADDR_TYPE_OP0: case ADDR_TYPE_OP1: extract_insn_cached (insn); mem = recog_data.operand[(addr_type == ADDR_TYPE_OP0) ? 0 : 1]; gcc_assert (MEM_P (mem)); addr = XEXP (mem, 0); mode = GET_MODE (mem); classify: valid = aarch64_classify_address (&info, addr, mode, MEM, false); if (!valid) return false; break; case ADDR_TYPE_OP0ADDR: case ADDR_TYPE_OP1ADDR: extract_insn_cached (insn); addr = recog_data.operand[(addr_type == ADDR_TYPE_OP0ADDR) ? 0 : 1]; mode = DImode; goto classify; case ADDR_TYPE_NONE: return false; } switch (info.type) { case ADDRESS_REG_IMM: return (mask & AARCH64_ADDR_REG_IMM) != 0; case ADDRESS_REG_WB: return (mask & AARCH64_ADDR_REG_WB) != 0; case ADDRESS_REG_REG: if (info.shift == 0) return (mask & AARCH64_ADDR_REG_REG) != 0; else return (mask & AARCH64_ADDR_REG_SHIFT) != 0; case ADDRESS_REG_UXTW: case ADDRESS_REG_SXTW: if (info.shift == 0) return (mask & AARCH64_ADDR_REG_EXT) != 0; else return (mask & AARCH64_ADDR_REG_SHIFT_EXT) != 0; case ADDRESS_LO_SUM: return (mask & AARCH64_ADDR_LO_SUM) != 0; case ADDRESS_SYMBOLIC: return (mask & AARCH64_ADDR_SYMBOLIC) != 0; default: return false; } } <END> with regards, Virendra Pathak
From 4687f79a01334c15b20f0191811d58a93e5dfbae Mon Sep 17 00:00:00 2001 From: Virendra Pathak <virendra.pat...@broadcom.com> Date: Wed, 29 Jun 2016 05:04:38 -0700 Subject: [PATCH] AArch64: add scheduler for vulcan cpu --- gcc/config/aarch64/aarch64-cores.def | 2 +- gcc/config/aarch64/aarch64-protos.h | 14 + gcc/config/aarch64/aarch64.c | 82 +++++ gcc/config/aarch64/aarch64.md | 91 +++-- gcc/config/aarch64/vulcan.md | 619 +++++++++++++++++++++++++++++++++++ 5 files changed, 778 insertions(+), 30 deletions(-) create mode 100644 gcc/config/aarch64/vulcan.md diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index f29d25a..55d2514 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -51,7 +51,7 @@ AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xge /* V8.1 Architecture Processors. */ -AARCH64_CORE("vulcan", vulcan, cortexa57, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, "0x42", "0x516") +AARCH64_CORE("vulcan", vulcan, vulcan, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, "0x42", "0x516") /* V8 big.LITTLE implementations. */ diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index e8c2ac8..43e21e2 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -240,6 +240,19 @@ struct tune_params unsigned int extra_tuning_flags; }; +/* Mask bits to use for for aarch64_mem_type_p. Unshifted/shifted index + register variants are separated for scheduling purposes because the + distinction matters on some cores. */ + +#define AARCH64_ADDR_REG_IMM 0x01 +#define AARCH64_ADDR_REG_WB 0x02 +#define AARCH64_ADDR_REG_REG 0x04 +#define AARCH64_ADDR_REG_SHIFT 0x08 +#define AARCH64_ADDR_REG_EXT 0x10 +#define AARCH64_ADDR_REG_SHIFT_EXT 0x20 +#define AARCH64_ADDR_LO_SUM 0x40 +#define AARCH64_ADDR_SYMBOLIC 0x80 + #define AARCH64_FUSION_PAIR(x, name) \ AARCH64_FUSE_##name##_index, /* Supported fusion operations. */ @@ -341,6 +354,7 @@ bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode); bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool); bool aarch64_simd_valid_immediate (rtx, machine_mode, bool, struct simd_immediate_info *); +bool aarch64_mem_type_p (rtx_insn *, unsigned HOST_WIDE_INT); bool aarch64_symbolic_address_p (rtx); bool aarch64_uimm12_shift (HOST_WIDE_INT); bool aarch64_use_return_insn_p (void); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index d7eb754..806d028 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4192,6 +4192,88 @@ aarch64_classify_address (struct aarch64_address_info *info, } } +/* Return TRUE if INSN uses an address that satisfies any of the (non-strict) + addressing modes specified by MASK. This is intended for use in scheduling + models that are sensitive to the form of address used by some particular + instruction. */ + +bool +aarch64_mem_type_p (rtx_insn *insn, unsigned HOST_WIDE_INT mask) +{ + aarch64_address_info info; + bool valid; + attr_addr_type addr_type; + rtx mem, addr; + machine_mode mode; + + addr_type = get_attr_addr_type (insn); + + switch (addr_type) + { + case ADDR_TYPE_WB: + info.type = ADDRESS_REG_WB; + break; + + case ADDR_TYPE_LO_SUM: + info.type = ADDRESS_LO_SUM; + break; + + case ADDR_TYPE_OP0: + case ADDR_TYPE_OP1: + extract_insn_cached (insn); + + mem = recog_data.operand[(addr_type == ADDR_TYPE_OP0) ? 0 : 1]; + + gcc_assert (MEM_P (mem)); + + addr = XEXP (mem, 0); + mode = GET_MODE (mem); + + classify: + valid = aarch64_classify_address (&info, addr, mode, MEM, false); + if (!valid) + return false; + + break; + + case ADDR_TYPE_OP0ADDR: + case ADDR_TYPE_OP1ADDR: + extract_insn_cached (insn); + + addr = recog_data.operand[(addr_type == ADDR_TYPE_OP0ADDR) ? 0 : 1]; + mode = DImode; + goto classify; + + case ADDR_TYPE_NONE: + return false; + } + + switch (info.type) + { + case ADDRESS_REG_IMM: + return (mask & AARCH64_ADDR_REG_IMM) != 0; + case ADDRESS_REG_WB: + return (mask & AARCH64_ADDR_REG_WB) != 0; + case ADDRESS_REG_REG: + if (info.shift == 0) + return (mask & AARCH64_ADDR_REG_REG) != 0; + else + return (mask & AARCH64_ADDR_REG_SHIFT) != 0; + case ADDRESS_REG_UXTW: + case ADDRESS_REG_SXTW: + if (info.shift == 0) + return (mask & AARCH64_ADDR_REG_EXT) != 0; + else + return (mask & AARCH64_ADDR_REG_SHIFT_EXT) != 0; + case ADDRESS_LO_SUM: + return (mask & AARCH64_ADDR_LO_SUM) != 0; + case ADDRESS_SYMBOLIC: + return (mask & AARCH64_ADDR_SYMBOLIC) != 0; + default: + return false; + } +} + bool aarch64_symbolic_address_p (rtx x) { diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index bcb7db0..2fa2a89 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -206,6 +206,9 @@ ;; no predicated insns. (define_attr "predicated" "yes,no" (const_string "no")) +(define_attr "addr_type" "none,op0,op1,op0addr,op1addr,lo_sum,wb" + (const_string "none")) + ;; ------------------------------------------------------------------- ;; Pipeline descriptions and scheduling ;; ------------------------------------------------------------------- @@ -219,6 +222,7 @@ (include "../arm/exynos-m1.md") (include "thunderx.md") (include "../arm/xgene1.md") +(include "vulcan.md") ;; ------------------------------------------------------------------- ;; Jumps and other miscellaneous insns @@ -535,7 +539,8 @@ return pftype[INTVAL(operands[1])][locality]; } - [(set_attr "type" "load1")] + [(set_attr "type" "load1") + (set_attr "addr_type" "op0addr")] ) (define_insn "trap" @@ -1017,7 +1022,8 @@ } [(set_attr "type" "mov_reg,mov_imm,neon_move,load1,load1,store1,store1,\ neon_to_gp<q>,neon_from_gp<q>,neon_dup") - (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")] + (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes") + (set_attr "addr_type" "*,*,*,op1,op1,op0,op0,*,*,*")] ) (define_expand "mov<mode>" @@ -1068,7 +1074,8 @@ }" [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ adr,adr,f_mcr,f_mrc,fmov") - (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] + (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes") + (set_attr "addr_type" "*,*,*,*,*,op1,op1,op0,op0,*,*,*,*,*")] ) (define_insn_and_split "*movdi_aarch64" @@ -1102,7 +1109,8 @@ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ adr,adr,f_mcr,f_mrc,fmov,neon_move") (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") - (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes") + (set_attr "addr_type" "*,*,*,*,*,op1,op1,op0,op0,*,*,*,*,*,*")] ) (define_insn "insv_imm<mode>" @@ -1147,7 +1155,8 @@ load2,store2,store2,f_loadd,f_stored") (set_attr "length" "8,8,8,4,4,4,4,4,4") (set_attr "simd" "*,*,*,yes,*,*,*,*,*") - (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")] + (set_attr "fp" "*,*,*,*,*,*,*,yes,yes") + (set_attr "addr_type" "*,*,*,*,op1,op0,op0,op1,op0")] ) ;; Split a TImode register-register or register-immediate move into @@ -1219,7 +1228,8 @@ mov\\t%w0, %w1" [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\ f_loads,f_stores,load1,store1,mov_reg") - (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] + (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*") + (set_attr "addr_type" "*,*,*,*,*,op1,op0,op1,op0,*")] ) (define_insn "*movdf_aarch64" @@ -1240,7 +1250,8 @@ mov\\t%x0, %x1" [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\ f_loadd,f_stored,load1,store1,mov_reg") - (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] + (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*") + (set_attr "addr_type" "*,*,*,*,*,op1,op0,op1,op0,*")] ) (define_insn "*movtf_aarch64" @@ -1265,7 +1276,8 @@ [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\ f_loadd,f_stored,load2,store2,store2") (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4") - (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")] + (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*") + (set_attr "addr_type" "*,*,*,*,*,*,op1,op0,op1,op0,op0")] ) (define_split @@ -1312,7 +1324,8 @@ ldp\\t%w0, %w2, %1 ldp\\t%s0, %s2, %1" [(set_attr "type" "load2,neon_load1_2reg") - (set_attr "fp" "*,yes")] + (set_attr "fp" "*,yes") + (set_attr "addr_type" "op1")] ) (define_insn "load_pairdi" @@ -1328,7 +1341,8 @@ ldp\\t%x0, %x2, %1 ldp\\t%d0, %d2, %1" [(set_attr "type" "load2,neon_load1_2reg") - (set_attr "fp" "*,yes")] + (set_attr "fp" "*,yes") + (set_attr "addr_type" "op1")] ) @@ -1347,7 +1361,8 @@ stp\\t%w1, %w3, %0 stp\\t%s1, %s3, %0" [(set_attr "type" "store2,neon_store1_2reg") - (set_attr "fp" "*,yes")] + (set_attr "fp" "*,yes") + (set_attr "addr_type" "op0")] ) (define_insn "store_pairdi" @@ -1363,7 +1378,8 @@ stp\\t%x1, %x3, %0 stp\\t%d1, %d3, %0" [(set_attr "type" "store2,neon_store1_2reg") - (set_attr "fp" "*,yes")] + (set_attr "fp" "*,yes") + (set_attr "addr_type" "op0")] ) ;; Operands 1 and 3 are tied together by the final condition; so we allow @@ -1381,7 +1397,8 @@ ldp\\t%s0, %s2, %1 ldp\\t%w0, %w2, %1" [(set_attr "type" "neon_load1_2reg,load2") - (set_attr "fp" "yes,*")] + (set_attr "fp" "yes,*") + (set_attr "addr_type" "op1")] ) (define_insn "load_pairdf" @@ -1397,7 +1414,8 @@ ldp\\t%d0, %d2, %1 ldp\\t%x0, %x2, %1" [(set_attr "type" "neon_load1_2reg,load2") - (set_attr "fp" "yes,*")] + (set_attr "fp" "yes,*") + (set_attr "addr_type" "op1")] ) ;; Operands 0 and 2 are tied together by the final condition; so we allow @@ -1415,7 +1433,8 @@ stp\\t%s1, %s3, %0 stp\\t%w1, %w3, %0" [(set_attr "type" "neon_store1_2reg,store2") - (set_attr "fp" "yes,*")] + (set_attr "fp" "yes,*") + (set_attr "addr_type" "op0")] ) (define_insn "store_pairdf" @@ -1431,7 +1450,8 @@ stp\\t%d1, %d3, %0 stp\\t%x1, %x3, %0" [(set_attr "type" "neon_store1_2reg,store2") - (set_attr "fp" "yes,*")] + (set_attr "fp" "yes,*") + (set_attr "addr_type" "op0")] ) ;; Load pair with post-index writeback. This is primarily used in function @@ -1448,7 +1468,8 @@ (match_operand:P 5 "const_int_operand" "n"))))])] "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)" "ldp\\t%<w>2, %<w>3, [%1], %4" - [(set_attr "type" "load2")] + [(set_attr "type" "load2") + (set_attr "addr_type" "wb")] ) (define_insn "loadwb_pair<GPF:mode>_<P:mode>" @@ -1481,7 +1502,8 @@ (match_operand:GPI 3 "register_operand" "r"))])] "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)" "stp\\t%<w>2, %<w>3, [%0, %4]!" - [(set_attr "type" "store2")] + [(set_attr "type" "store2") + (set_attr "addr_type" "wb")] ) (define_insn "storewb_pair<GPF:mode>_<P:mode>" @@ -1517,7 +1539,8 @@ "@ sxtw\t%0, %w1 ldrsw\t%0, %1" - [(set_attr "type" "extend,load1")] + [(set_attr "type" "extend,load1") + (set_attr "addr_type" "*,op1")] ) (define_insn "*load_pair_extendsidi2_aarch64" @@ -1530,7 +1553,8 @@ XEXP (operands[1], 0), GET_MODE_SIZE (SImode)))" "ldpsw\\t%0, %2, %1" - [(set_attr "type" "load2")] + [(set_attr "type" "load2") + (set_attr "addr_type" "op1")] ) (define_insn "*zero_extendsidi2_aarch64" @@ -1540,7 +1564,8 @@ "@ uxtw\t%0, %w1 ldr\t%w0, %1" - [(set_attr "type" "extend,load1")] + [(set_attr "type" "extend,load1") + (set_attr "addr_type" "*,op1")] ) (define_insn "*load_pair_zero_extendsidi2_aarch64" @@ -1553,7 +1578,8 @@ XEXP (operands[1], 0), GET_MODE_SIZE (SImode)))" "ldp\\t%w0, %w2, %1" - [(set_attr "type" "load2")] + [(set_attr "type" "load2") + (set_attr "addr_type" "op1")] ) (define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2" @@ -1569,7 +1595,8 @@ "@ sxt<SHORT:size>\t%<GPI:w>0, %w1 ldrs<SHORT:size>\t%<GPI:w>0, %1" - [(set_attr "type" "extend,load1")] + [(set_attr "type" "extend,load1") + (set_attr "addr_type" "*,op1")] ) (define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64" @@ -1580,7 +1607,8 @@ uxt<SHORT:size>\t%<GPI:w>0, %w1 ldr<SHORT:size>\t%w0, %1 ldr\t%<SHORT:size>0, %1" - [(set_attr "type" "extend,load1,load1")] + [(set_attr "type" "extend,load1,load1") + (set_attr "addr_type" "*,op1,op1")] ) (define_expand "<optab>qihi2" @@ -4983,7 +5011,8 @@ UNSPEC_GOTSMALLPIC))] "" "ldr\\t%<w>0, [%1, #:got_lo12:%a2]" - [(set_attr "type" "load1")] + [(set_attr "type" "load1") + (set_attr "addr_type" "lo_sum")] ) (define_insn "ldr_got_small_sidi" @@ -4995,7 +5024,8 @@ UNSPEC_GOTSMALLPIC)))] "TARGET_ILP32" "ldr\\t%w0, [%1, #:got_lo12:%a2]" - [(set_attr "type" "load1")] + [(set_attr "type" "load1") + (set_attr "addr_type" "lo_sum")] ) (define_insn "ldr_got_small_28k_<mode>" @@ -5027,7 +5057,8 @@ UNSPEC_GOTTINYPIC))] "" "ldr\\t%0, %L1" - [(set_attr "type" "load1")] + [(set_attr "type" "load1") + (set_attr "addr_type" "op1addr")] ) (define_insn "aarch64_load_tp_hard" @@ -5069,7 +5100,8 @@ "" "adrp\\t%0, %A1\;ldr\\t%<w>0, [%0, #%L1]" [(set_attr "type" "load1") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "addr_type" "op1addr")] ) (define_insn "tlsie_small_sidi" @@ -5080,7 +5112,8 @@ "" "adrp\\t%0, %A1\;ldr\\t%w0, [%0, #%L1]" [(set_attr "type" "load1") - (set_attr "length" "8")] + (set_attr "length" "8") + (set_attr "addr_type" "op1addr")] ) (define_insn "tlsie_tiny_<mode>" diff --git a/gcc/config/aarch64/vulcan.md b/gcc/config/aarch64/vulcan.md new file mode 100644 index 0000000..db015d6 --- /dev/null +++ b/gcc/config/aarch64/vulcan.md @@ -0,0 +1,619 @@ +;; Broadcom Vulcan pipeline description +;; Copyright (C) 2015 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "vulcan") + +(define_cpu_unit "vulcan_i0" "vulcan") +(define_cpu_unit "vulcan_i1" "vulcan") +(define_cpu_unit "vulcan_i2" "vulcan") +(define_cpu_unit "vulcan_ls0" "vulcan") +(define_cpu_unit "vulcan_ls1" "vulcan") +(define_cpu_unit "vulcan_sd" "vulcan") + +; Pseudo-units for multiply pipeline. + +(define_cpu_unit "vulcan_i1m1" "vulcan") +(define_cpu_unit "vulcan_i1m2" "vulcan") +(define_cpu_unit "vulcan_i1m3" "vulcan") + +; Pseudo-units for load delay (assuming dcache hit). + +(define_cpu_unit "vulcan_ls0d1" "vulcan") +(define_cpu_unit "vulcan_ls0d2" "vulcan") +(define_cpu_unit "vulcan_ls0d3" "vulcan") + +(define_cpu_unit "vulcan_ls1d1" "vulcan") +(define_cpu_unit "vulcan_ls1d2" "vulcan") +(define_cpu_unit "vulcan_ls1d3" "vulcan") + +; Make some aliases for f0/f1. +(define_reservation "vulcan_f0" "vulcan_i0") +(define_reservation "vulcan_f1" "vulcan_i1") + +(define_reservation "vulcan_i012" "vulcan_i0|vulcan_i1|vulcan_i2") +(define_reservation "vulcan_ls01" "vulcan_ls0|vulcan_ls1") +(define_reservation "vulcan_f01" "vulcan_f0|vulcan_f1") + +(define_reservation "vulcan_ls_both" "vulcan_ls0+vulcan_ls1") + +; A load with delay in the ls0/ls1 pipes. +(define_reservation "vulcan_l0delay" "vulcan_ls0,vulcan_ls0d1,vulcan_ls0d2,\ + vulcan_ls0d3") +(define_reservation "vulcan_l1delay" "vulcan_ls1,vulcan_ls1d1,vulcan_ls1d2,\ + vulcan_ls1d3") +(define_reservation "vulcan_l01delay" "vulcan_l0delay|vulcan_l1delay") + +;; Branch and call instructions. + +(define_insn_reservation "vulcan_branch" 1 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "call,branch")) + "vulcan_i2") + +;; Integer arithmetic/logic instructions. + +; Plain register moves are handled by renaming, and don't create any uops. + +(define_insn_reservation "vulcan_regmove" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "mov_reg")) + "nothing") + +(define_insn_reservation "vulcan_alu_basic" 1 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "alu_imm,alu_sreg,alus_imm,alus_sreg,\ + adc_reg,adc_imm,adcs_reg,adcs_imm,\ + logic_reg,logic_imm,logics_reg,logics_imm,\ + csel,adr,mov_imm,shift_reg,shift_imm,bfm,\ + rbit,rev,extend")) + "vulcan_i012") + +(define_insn_reservation "vulcan_alu_shift" 2 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "alu_shift_imm,alu_ext,alu_shift_reg,\ + alus_shift_imm,alus_ext,alus_shift_reg,\ + logic_shift_imm,logics_shift_reg")) + "vulcan_i012,vulcan_i012") + +; NOTE: 13 is the minimum latency given. Use average or max instead? +(define_insn_reservation "vulcan_div" 13 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "sdiv,udiv")) + "vulcan_i1*13") + +(define_insn_reservation "vulcan_madd" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "mla,smlal,umlal")) + "vulcan_i1,vulcan_i1m1,vulcan_i1m2,vulcan_i1m3,vulcan_i012") + +; NOTE: smull, umull are used for "high part" multiplies too. +(define_insn_reservation "vulcan_mul" 4 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "mul,smull,umull")) + "vulcan_i1,vulcan_i1m1,vulcan_i1m2,vulcan_i1m3") + +(define_insn_reservation "vulcan_countbits" 3 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "clz")) + "vulcan_i1") + +;; Integer loads and stores. + +(define_insn_reservation "vulcan_load_basic" 4 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "load1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC + | AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) + "vulcan_ls01") + +(define_insn_reservation "vulcan_load_automod" 4 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "load1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "vulcan_ls01,vulcan_i012") + +(define_insn_reservation "vulcan_load_regoffset" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "load1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG)")) + "vulcan_i012,vulcan_ls01") + +(define_insn_reservation "vulcan_load_scale_ext" 6 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "load1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_SHIFT + | AARCH64_ADDR_REG_EXT + | AARCH64_ADDR_REG_SHIFT_EXT)")) + "vulcan_i012,vulcan_i012,vulcan_ls01") + +(define_insn_reservation "vulcan_loadpair" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "load2")) + "vulcan_i012,vulcan_ls01") + +(define_insn_reservation "vulcan_store_basic" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "store1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC + | AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) + "vulcan_ls01,vulcan_sd") + +(define_insn_reservation "vulcan_store_automod" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "store1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "vulcan_ls01,(vulcan_sd+vulcan_i012)") + +(define_insn_reservation "vulcan_store_regoffset_scale_ext" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "store1") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG + | AARCH64_ADDR_REG_SHIFT + | AARCH64_ADDR_REG_EXT + | AARCH64_ADDR_REG_SHIFT_EXT)")) + "vulcan_i012,vulcan_ls01,vulcan_sd") + +(define_insn_reservation "vulcan_storepair_basic" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "store2") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) + "vulcan_ls01,vulcan_sd") + +(define_insn_reservation "vulcan_storepair_automod" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "store2") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "vulcan_ls01,(vulcan_sd+vulcan_i012)") + +;; FP data processing instructions. + +(define_insn_reservation "vulcan_fp_simple" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "ffariths,ffarithd,f_minmaxs,f_minmaxd")) + "vulcan_f01") + +(define_insn_reservation "vulcan_fp_addsub" 6 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "fadds,faddd")) + "vulcan_f01") + +(define_insn_reservation "vulcan_fp_cmp" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "fcmps,fcmpd")) + "vulcan_f01") + +(define_insn_reservation "vulcan_fp_divsqrt_s" 16 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "fdivs,fsqrts")) + "vulcan_f0*8|vulcan_f1*8") + +(define_insn_reservation "vulcan_fp_divsqrt_d" 23 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "fdivd,fsqrtd")) + "vulcan_f0*12|vulcan_f1*12") + +(define_insn_reservation "vulcan_fp_mul_mac" 6 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "fmuls,fmuld,fmacs,fmacd")) + "vulcan_f01") + +(define_insn_reservation "vulcan_frint" 7 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "f_rints,f_rintd")) + "vulcan_f01") + +(define_insn_reservation "vulcan_fcsel" 4 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "fcsel")) + "vulcan_f01") + +;; FP miscellaneous instructions. + +(define_insn_reservation "vulcan_fp_cvt" 7 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "f_cvtf2i,f_cvt,f_cvti2f")) + "vulcan_f01") + +(define_insn_reservation "vulcan_fp_mov" 4 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "fconsts,fconstd,fmov,f_mrc")) + "vulcan_f01") + +(define_insn_reservation "vulcan_fp_mov_to_gen" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "f_mcr")) + "vulcan_f01") + +;; FP loads and stores. + +(define_insn_reservation "vulcan_fp_load_basic" 4 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "f_loads,f_loadd") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC + | AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) + "vulcan_ls01") + +(define_insn_reservation "vulcan_fp_load_automod" 4 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "f_loads,f_loadd") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "vulcan_ls01,vulcan_i012") + +(define_insn_reservation "vulcan_fp_load_regoffset" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "f_loads,f_loadd") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG)")) + "vulcan_ls01,vulcan_i012") + +(define_insn_reservation "vulcan_fp_load_scale_ext" 6 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "f_loads,f_loadd") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_SHIFT + | AARCH64_ADDR_REG_EXT + | AARCH64_ADDR_REG_SHIFT_EXT)")) + "vulcan_ls01,vulcan_i012") + +(define_insn_reservation "vulcan_fp_loadpair_basic" 4 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_load1_2reg") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) + "vulcan_ls01*2") + +(define_insn_reservation "vulcan_fp_loadpair_automod" 4 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_load1_2reg") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "vulcan_ls01,(vulcan_ls01+vulcan_i012)") + +(define_insn_reservation "vulcan_fp_store_basic" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "f_stores,f_stored") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC + | AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) + "vulcan_ls01,vulcan_sd") + +(define_insn_reservation "vulcan_fp_store_automod" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "f_stores,f_stored") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "vulcan_ls01,(vulcan_sd+vulcan_i012)") + +(define_insn_reservation "vulcan_fp_store_regoffset_scale_ext" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "f_stores,f_stored") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG + | AARCH64_ADDR_REG_SHIFT + | AARCH64_ADDR_REG_EXT + | AARCH64_ADDR_REG_SHIFT_EXT)")) + "vulcan_i012,vulcan_ls01,vulcan_sd") + +(define_insn_reservation "vulcan_fp_storepair_basic" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_store1_2reg") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM + | AARCH64_ADDR_LO_SUM)")) + "vulcan_ls01,(vulcan_ls01+vulcan_sd),vulcan_sd") + +(define_insn_reservation "vulcan_fp_storepair_automod" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_store1_2reg") + (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)")) + "vulcan_ls01,(vulcan_ls01+vulcan_sd+vulcan_i012),vulcan_sd") + +;; ASIMD integer instructions. + +(define_insn_reservation "vulcan_asimd_int" 7 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_abd,neon_abd_q,\ + neon_arith_acc,neon_arith_acc_q,\ + neon_abs,neon_abs_q,\ + neon_add,neon_add_q,\ + neon_neg,neon_neg_q,\ + neon_add_long,neon_add_widen,\ + neon_add_halve,neon_add_halve_q,\ + neon_sub_long,neon_sub_widen,\ + neon_sub_halve,neon_sub_halve_q,\ + neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\ + neon_qabs,neon_qabs_q,\ + neon_qadd,neon_qadd_q,\ + neon_qneg,neon_qneg_q,\ + neon_qsub,neon_qsub_q,\ + neon_minmax,neon_minmax_q,\ + neon_reduc_minmax,neon_reduc_minmax_q,\ + neon_mul_b,neon_mul_h,neon_mul_s,\ + neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,\ + neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,\ + neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,\ + neon_mla_b,neon_mla_h,neon_mla_s,\ + neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,\ + neon_mul_b_long,neon_mul_h_long,\ + neon_mul_s_long,neon_mul_d_long,\ + neon_sat_mul_b_long,neon_sat_mul_h_long,\ + neon_sat_mul_s_long,\ + neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\ + neon_sat_mla_b_long,neon_sat_mla_h_long,\ + neon_sat_mla_s_long,\ + neon_shift_acc,neon_shift_acc_q,\ + neon_shift_imm,neon_shift_imm_q,\ + neon_shift_reg,neon_shift_reg_q,\ + neon_shift_imm_long,neon_shift_imm_narrow_q,\ + neon_sat_shift_imm,neon_sat_shift_imm_q,\ + neon_sat_shift_reg,neon_sat_shift_reg_q,\ + neon_sat_shift_imm_narrow_q")) + "vulcan_f01") + +(define_insn_reservation "vulcan_asimd_reduc_add" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_reduc_add,neon_reduc_add_q")) + "vulcan_f01") + +(define_insn_reservation "vulcan_asimd_cmp" 7 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_compare,neon_compare_q,neon_compare_zero,\ + neon_tst,neon_tst_q")) + "vulcan_f01") + +; Note: logical AND should have a latency of 7, not 5. + +(define_insn_reservation "vulcan_asimd_logic" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_logic,neon_logic_q")) + "vulcan_f01") + +;; ASIMD floating-point instructions. + +(define_insn_reservation "vulcan_asimd_fp_simple" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_d,\ + neon_fp_abs_s_q,neon_fp_abs_d_q,\ + neon_fp_compare_s,neon_fp_compare_d,\ + neon_fp_compare_s_q,neon_fp_compare_d_q,\ + neon_fp_minmax_s,neon_fp_minmax_d,\ + neon_fp_minmax_s_q,neon_fp_minmax_d_q,\ + neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,\ + neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,\ + neon_fp_neg_s,neon_fp_neg_d,\ + neon_fp_neg_s_q,neon_fp_neg_d_q")) + "vulcan_f01") + +(define_insn_reservation "vulcan_asimd_fp_arith" 6 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\ + neon_fp_abd_s_q,neon_fp_abd_d_q,\ + neon_fp_addsub_s,neon_fp_addsub_d,\ + neon_fp_addsub_s_q,neon_fp_addsub_d_q,\ + neon_fp_reduc_add_s,neon_fp_reduc_add_d,\ + neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q,\ + neon_fp_mul_s,neon_fp_mul_d,\ + neon_fp_mul_s_q,neon_fp_mul_d_q,\ + neon_fp_mla_s,neon_fp_mla_d,\ + neon_fp_mla_s_q,neon_fp_mla_d_q")) + "vulcan_f01") + +(define_insn_reservation "vulcan_asimd_fp_conv" 7 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q,\ + neon_fp_to_int_s,neon_fp_to_int_d,\ + neon_fp_to_int_s_q,neon_fp_to_int_d_q,\ + neon_fp_round_s,neon_fp_round_d,\ + neon_fp_round_s_q,neon_fp_round_d_q")) + "vulcan_f01") + +(define_insn_reservation "vulcan_asimd_fp_div_s" 16 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q")) + "vulcan_f01") + +(define_insn_reservation "vulcan_asimd_fp_div_d" 23 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_fp_div_d,neon_fp_div_d_q")) + "vulcan_f01") + +;; ASIMD miscellaneous instructions. + +(define_insn_reservation "vulcan_asimd_misc" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_rbit,\ + neon_bsl,neon_bsl_q,\ + neon_cls,neon_cls_q,\ + neon_cnt,neon_cnt_q,\ + neon_from_gp,neon_from_gp_q,\ + neon_dup,neon_dup_q,\ + neon_ext,neon_ext_q,\ + neon_ins,neon_ins_q,\ + neon_move,neon_move_q,\ + neon_fp_recpe_s,neon_fp_recpe_d,\ + neon_fp_recpe_s_q,neon_fp_recpe_d_q,\ + neon_fp_recpx_s,neon_fp_recpx_d,\ + neon_fp_recpx_s_q,neon_fp_recpx_d_q,\ + neon_rev,neon_rev_q,\ + neon_dup,neon_dup_q,\ + neon_permute,neon_permute_q")) + "vulcan_f01") + +(define_insn_reservation "vulcan_asimd_recip_step" 6 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q,\ + neon_fp_recps_d,neon_fp_recps_d_q")) + "vulcan_f01") + +(define_insn_reservation "vulcan_asimd_lut" 8 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_tbl1,neon_tbl1_q,neon_tbl2_q")) + "vulcan_f01") + +(define_insn_reservation "vulcan_asimd_elt_to_gr" 6 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_to_gp,neon_to_gp_q")) + "vulcan_f01") + +;; ASIMD load instructions. + +; NOTE: These reservations attempt to model latency and throughput correctly, +; but the cycle timing of unit allocation is not necessarily accurate (because +; insns are split into uops, and those may be issued out-of-order). + +(define_insn_reservation "vulcan_asimd_load1_1_mult" 4 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q")) + "vulcan_ls01") + +(define_insn_reservation "vulcan_asimd_load1_2_mult" 4 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q")) + "vulcan_ls_both") + +(define_insn_reservation "vulcan_asimd_load1_3_mult" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q")) + "(vulcan_ls_both,vulcan_ls01)|(vulcan_ls01,vulcan_ls_both)") + +(define_insn_reservation "vulcan_asimd_load1_4_mult" 6 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q")) + "vulcan_ls_both*2") + +(define_insn_reservation "vulcan_asimd_load1_onelane" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q")) + "vulcan_l01delay,vulcan_f01") + +(define_insn_reservation "vulcan_asimd_load1_all" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_load1_all_lanes,neon_load1_all_lanes_q")) + "vulcan_l01delay,vulcan_f01") + +(define_insn_reservation "vulcan_asimd_load2" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_load2_2reg,neon_load2_2reg_q,\ + neon_load2_one_lane,neon_load2_one_lane_q,\ + neon_load2_all_lanes,neon_load2_all_lanes_q")) + "(vulcan_l0delay,vulcan_f01)|(vulcan_l1delay,vulcan_f01)") + +(define_insn_reservation "vulcan_asimd_load3_mult" 8 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q")) + "vulcan_ls_both*3,(vulcan_ls0d1+vulcan_ls1d1),(vulcan_ls0d2+vulcan_ls1d2),\ + (vulcan_ls0d3+vulcan_ls1d3),vulcan_f01") + +(define_insn_reservation "vulcan_asimd_load3_elts" 7 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_load3_one_lane,neon_load3_one_lane_q,\ + neon_load3_all_lanes,neon_load3_all_lanes_q")) + "vulcan_ls_both,vulcan_l01delay,vulcan_f01") + +(define_insn_reservation "vulcan_asimd_load4_mult" 8 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q")) + "vulcan_ls_both*4,(vulcan_ls0d1+vulcan_ls1d1),(vulcan_ls0d2+vulcan_ls1d2),\ + (vulcan_ls0d3+vulcan_ls1d3),vulcan_f01") + +(define_insn_reservation "vulcan_asimd_load4_elts" 6 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_load4_one_lane,neon_load4_one_lane_q,\ + neon_load4_all_lanes,neon_load4_all_lanes_q")) + "vulcan_ls_both*2,(vulcan_ls0d1+vulcan_ls1d1),(vulcan_ls0d2+vulcan_ls1d2),\ + (vulcan_ls0d3+vulcan_ls1d3),vulcan_f01") + +;; ASIMD store instructions. + +; Same note applies as for ASIMD load instructions. + +(define_insn_reservation "vulcan_asimd_store1_1_mult" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q")) + "vulcan_ls01") + +(define_insn_reservation "vulcan_asimd_store1_2_mult" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_store1_2reg,neon_store1_2reg_q")) + "vulcan_ls_both") + +(define_insn_reservation "vulcan_asimd_store1_3_mult" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_store1_3reg,neon_store1_3reg_q")) + "(vulcan_ls_both,vulcan_ls01)|(vulcan_ls01,vulcan_ls_both)") + +(define_insn_reservation "vulcan_asimd_store1_4_mult" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_store1_4reg,neon_store1_4reg_q")) + "vulcan_ls_both*2") + +(define_insn_reservation "vulcan_asimd_store1_onelane" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_store1_one_lane,neon_store1_one_lane_q")) + "vulcan_ls01,vulcan_f01") + +(define_insn_reservation "vulcan_asimd_store2_mult" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_store2_2reg,neon_store2_2reg_q")) + "vulcan_ls_both,vulcan_f01") + +(define_insn_reservation "vulcan_asimd_store2_onelane" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_store2_one_lane,neon_store2_one_lane_q")) + "vulcan_ls01,vulcan_f01") + +(define_insn_reservation "vulcan_asimd_store3_mult" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_store3_3reg,neon_store3_3reg_q")) + "vulcan_ls_both*3,vulcan_f01") + +(define_insn_reservation "vulcan_asimd_store3_onelane" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_store3_one_lane,neon_store3_one_lane_q")) + "vulcan_ls_both,vulcan_f01") + +(define_insn_reservation "vulcan_asimd_store4_mult" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_store4_4reg,neon_store4_4reg_q")) + "vulcan_ls_both*4,vulcan_f01") + +(define_insn_reservation "vulcan_asimd_store4_onelane" 0 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "neon_store4_one_lane,neon_store4_one_lane_q")) + "vulcan_ls_both,vulcan_f01") + +;; Crypto extensions. + +; FIXME: Forwarding path for aese/aesmc or aesd/aesimc pairs? + +(define_insn_reservation "vulcan_aes" 5 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "crypto_aese,crypto_aesmc")) + "vulcan_f1") + +(define_insn_reservation "vulcan_sha" 7 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,crypto_sha1_slow,\ + crypto_sha256_fast,crypto_sha256_slow")) + "vulcan_f1") + +;; CRC extension. + +(define_insn_reservation "vulcan_crc" 4 + (and (eq_attr "tune" "vulcan") + (eq_attr "type" "crc")) + "vulcan_i1") -- 2.4.11