On Mon, Jun 12, 2017 at 02:54:00PM +0100, James Greenhalgh wrote: > > Hi, > > There seems to be a partial misconception in the AArch64 backend that > load1/load2 referred to the number of registers to load, rather than the > number of words to load. This patch fixes that using the new "number of > byte" types added in the previous patch. > > That means using the load_16 and store_16 types that were defined in the > previous patch for the first time in the AArch64 backend. To ensure > continuity for scheduling models, I've just split this out from load_8. > Please update your models if this is very wrong!
I've updated this patch on trunk, rechecked it, and committed this patch as r252026. Thanks, James --- 2017-09-12 James Greenhalgh <james.greenha...@arm.com> * config/aarch64/aarch64.md (movdi_aarch64): Set load/store types correctly. (movti_aarch64): Likewise. (movdf_aarch64): Likewise. (movtf_aarch64): Likewise. (load_pairdi): Likewise. (store_pairdi): Likewise. (load_pairdf): Likewise. (store_pairdf): Likewise. (loadwb_pair<GPI:mode>_<P:mode>): Likewise. (storewb_pair<GPI:mode>_<P:mode>): Likewise. (ldr_got_small_<mode>): Likewise. (ldr_got_small_28k_<mode>): Likewise. (ldr_got_tiny): Likewise. * config/aarch64/iterators.md (ldst_sz): New. (ldpstp_sz): Likewise. * config/aarch64/thunderx.md (thunderx_storepair): Split store_8 to store_16. (thunderx_load): Split load_8 to load_16. * config/aarch64/thunderx2t99.md (thunderx2t99_loadpair): Split load_8 to load_16. (thunderx2t99_storepair_basic): Split store_8 to store_16. * config/arm/xgene1.md (xgene1_load_pair): Split load_8 to load_16. (xgene1_store_pair): Split store_8 to store_16. * config/aarch64/falkor.md (falkor_ld_3_ld): Split load_8 to load_16. (falkor_st_0_st_sd): Split store_8 to store_16.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 7cbb458..e85376c 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -994,8 +994,8 @@ aarch64_expand_mov_immediate (operands[0], operands[1]); DONE; }" - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,\ - load_4,store_4,store_4,adr,adr,f_mcr,f_mrc,fmov,neon_move") + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_8,\ + load_8,store_8,store_8,adr,adr,f_mcr,f_mrc,fmov,neon_move") (set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] ) @@ -1039,7 +1039,8 @@ ldr\\t%q0, %1 str\\t%q1, %0" [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \ - load_8,store_8,store_8,f_loadd,f_stored") + load_16,store_16,store_16,\ + load_16,store_16") (set_attr "length" "8,8,8,4,4,4,4,4,4") (set_attr "simd" "*,*,*,yes,*,*,*,*,*") (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")] @@ -1142,7 +1143,7 @@ mov\\t%x0, %x1 mov\\t%x0, %1" [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\ - f_loadd,f_stored,load_4,store_4,mov_reg,\ + f_loadd,f_stored,load_8,store_8,mov_reg,\ fconstd") (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")] ) @@ -1187,7 +1188,7 @@ stp\\t%1, %H1, %0 stp\\txzr, xzr, %0" [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\ - f_loadd,f_stored,load_8,store_8,store_8") + f_loadd,f_stored,load_16,store_16,store_16") (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4") (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")] ) @@ -1251,7 +1252,7 @@ "@ ldp\\t%x0, %x2, %1 ldp\\t%d0, %d2, %1" - [(set_attr "type" "load_8,neon_load1_2reg") + [(set_attr "type" "load_16,neon_load1_2reg") (set_attr "fp" "*,yes")] ) @@ -1286,7 +1287,7 @@ "@ stp\\t%x1, %x3, %0 stp\\t%d1, %d3, %0" - [(set_attr "type" "store_8,neon_store1_2reg") + [(set_attr "type" "store_16,neon_store1_2reg") (set_attr "fp" "*,yes")] ) @@ -1320,7 +1321,7 @@ "@ ldp\\t%d0, %d2, %1 ldp\\t%x0, %x2, %1" - [(set_attr "type" "neon_load1_2reg,load_8") + [(set_attr "type" "neon_load1_2reg,load_16") (set_attr "fp" "yes,*")] ) @@ -1354,7 +1355,7 @@ "@ stp\\t%d1, %d3, %0 stp\\t%x1, %x3, %0" - [(set_attr "type" "neon_store1_2reg,store_8") + [(set_attr "type" "neon_store1_2reg,store_16") (set_attr "fp" "yes,*")] ) @@ -1372,7 +1373,7 @@ (match_operand:P 5 "const_int_operand" "n"))))])] "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)" "ldp\\t%<w>2, %<w>3, [%1], %4" - [(set_attr "type" "load_8")] + [(set_attr "type" "load_<ldpstp_sz>")] ) (define_insn "loadwb_pair<GPF:mode>_<P:mode>" @@ -1405,7 +1406,7 @@ (match_operand:GPI 3 "register_operand" "r"))])] "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)" "stp\\t%<w>2, %<w>3, [%0, %4]!" - [(set_attr "type" "store_8")] + [(set_attr "type" "store_<ldpstp_sz>")] ) (define_insn "storewb_pair<GPF:mode>_<P:mode>" @@ -5355,7 +5356,7 @@ UNSPEC_GOTSMALLPIC))] "" "ldr\\t%<w>0, [%1, #:got_lo12:%a2]" - [(set_attr "type" "load_4")] + [(set_attr "type" "load_<ldst_sz>")] ) (define_insn "ldr_got_small_sidi" @@ -5378,7 +5379,7 @@ UNSPEC_GOTSMALLPIC28K))] "" "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]" - [(set_attr "type" "load_4")] + [(set_attr "type" "load_<ldst_sz>")] ) (define_insn "ldr_got_small_28k_sidi" @@ -5399,7 +5400,7 @@ UNSPEC_GOTTINYPIC))] "" "ldr\\t%0, %L1" - [(set_attr "type" "load_4")] + [(set_attr "type" "load_8")] ) (define_insn "aarch64_load_tp_hard" diff --git a/gcc/config/aarch64/falkor.md b/gcc/config/aarch64/falkor.md index 66efc8c..83971ce 100644 --- a/gcc/config/aarch64/falkor.md +++ b/gcc/config/aarch64/falkor.md @@ -581,7 +581,7 @@ (define_insn_reservation "falkor_ld_3_ld" 3 (and (eq_attr "tune" "falkor") - (eq_attr "type" "load_4,load_8")) + (eq_attr "type" "load_4,load_8,load_16")) "falkor_ld") ;; Miscellaneous Data-Processing Instructions @@ -663,7 +663,7 @@ (define_insn_reservation "falkor_st_0_st_sd" 0 (and (eq_attr "tune" "falkor") - (eq_attr "type" "store_4,store_8")) + (eq_attr "type" "store_4,store_8,store_16")) "falkor_st+falkor_sd") ;; Muliply bypasses. diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 3e38767..477dc35 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -387,6 +387,11 @@ ;; 32-bit version and "%x0" in the 64-bit version. (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")]) +;; The size of access, in bytes. +(define_mode_attr ldst_sz [(SI "4") (DI "8")]) +;; Likewise for load/store pair. +(define_mode_attr ldpstp_sz [(SI "8") (DI "16")]) + ;; For inequal width int to float conversion (define_mode_attr w1 [(HF "w") (SF "w") (DF "x")]) (define_mode_attr w2 [(HF "x") (SF "x") (DF "w")]) diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md index c18da2f..84ac6cd 100644 --- a/gcc/config/aarch64/thunderx.md +++ b/gcc/config/aarch64/thunderx.md @@ -100,7 +100,7 @@ ;; Store pair are single issued (define_insn_reservation "thunderx_storepair" 1 (and (eq_attr "tune" "thunderx") - (eq_attr "type" "store_8")) + (eq_attr "type" "store_8,store_16")) "thunderx_pipe0 + thunderx_pipe1") ;; Prefetch are single issued @@ -112,7 +112,7 @@ ;; loads (and load pairs) from L1 take 3 cycles in pipe 0 (define_insn_reservation "thunderx_load" 3 (and (eq_attr "tune" "thunderx") - (eq_attr "type" "load_4, load_8")) + (eq_attr "type" "load_4, load_8, load_16")) "thunderx_pipe0") (define_insn_reservation "thunderx_brj" 1 diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md index 41a45ca..5bcf4ff 100644 --- a/gcc/config/aarch64/thunderx2t99.md +++ b/gcc/config/aarch64/thunderx2t99.md @@ -128,7 +128,7 @@ (define_insn_reservation "thunderx2t99_loadpair" 5 (and (eq_attr "tune" "thunderx2t99") - (eq_attr "type" "load_8")) + (eq_attr "type" "load_8,load_16")) "thunderx2t99_i012,thunderx2t99_ls01") (define_insn_reservation "thunderx2t99_store_basic" 1 @@ -138,7 +138,7 @@ (define_insn_reservation "thunderx2t99_storepair_basic" 1 (and (eq_attr "tune" "thunderx2t99") - (eq_attr "type" "store_8")) + (eq_attr "type" "store_8,store_16")) "thunderx2t99_ls01,thunderx2t99_sd") ;; FP data processing instructions. diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md index d0b17ab..c4b3773 100644 --- a/gcc/config/arm/xgene1.md +++ b/gcc/config/arm/xgene1.md @@ -92,12 +92,12 @@ (define_insn_reservation "xgene1_load_pair" 6 (and (eq_attr "tune" "xgene1") - (eq_attr "type" "load_8")) + (eq_attr "type" "load_8, load_16")) "xgene1_decodeIsolated") (define_insn_reservation "xgene1_store_pair" 2 (and (eq_attr "tune" "xgene1") - (eq_attr "type" "store_8")) + (eq_attr "type" "store_8, store_16")) "xgene1_decodeIsolated") (define_insn_reservation "xgene1_fp_load1" 10