Oh sure, I've now successfully bootstrapped on arm-linux-gnueabihf and aarch64-unknown-linux-gnu.
Dominik > On 10 Nov 2017, at 10:53, Kyrill Tkachov <kyrylo.tkac...@foss.arm.com> wrote: > > Hi Dominic, > > On 10/11/17 09:36, Dominik Inführ wrote: >> Hi, >> >> this patch tries to refine the instruction scheduling model for X-Gene. >> Improved performance for 456.hmmer and 464.h264ref (about 1%). Also splits >> the model into multiple automatons, therefore smaller binary and faster >> build time. Survives bootstrap. >> >> Best, >> Dominik > > The changes look ok to me, but as the description is shared between the arm > and aarch64 ports can you please also do a sanity check > by building (and preferably bootstrapping) an arm compiler? > > Thanks, > Kyrill > >> gcc/ChangeLog: >> 2017-10-09 Dominik Infuehr <dominik.infu...@theobroma-systems.com> >> >> * config/arm/xgene1.md (xgene1): Split into automatons >> xgene1_main, xgene1_decoder, xgene1_div, xgene1_simd. >> (xgene1_f_load): Adjust reservations and/or types. >> (xgene1_f_store): Likewise. >> (xgene1_load_pair): Likewise. >> (xgene1_store_pair): Likewise. >> (xgene1_fp_load1): Likewise. >> (xgene1_load1): Likewise. >> (xgene1_store1): Likewise. >> (xgene1_move): Likewise. >> (xgene1_alu): Likewise. >> (xgene1_simd): Likewise. >> (xgene1_bfm): Likewise. >> (xgene1_neon_load1): Likewise. >> (xgene1_neon_store1): Likewise. >> (xgene1_neon_logic): Likewise. >> (xgene1_neon_st1): Likewise. >> (xgene1_neon_ld1r): Likewise. >> (xgene1_alu_cond): Added. >> (xgene1_shift_reg): Likwise. >> (xgene1_bfx): Likewise. >> (xgene1_mul): Split into xgene1_mul32, xgene1_mul64. >> >> — >> diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md >> index c4b3773..cf0694a 100644 >> --- a/gcc/config/arm/xgene1.md >> +++ b/gcc/config/arm/xgene1.md >> @@ -20,17 +20,26 @@ >> >> ;; Pipeline description for the xgene1 micro-architecture >> >> -(define_automaton "xgene1") >> +(define_automaton "xgene1_main, xgene1_decoder, xgene1_div, xgene1_simd") >> >> -(define_cpu_unit "xgene1_decode_out0" "xgene1") >> -(define_cpu_unit "xgene1_decode_out1" "xgene1") >> -(define_cpu_unit "xgene1_decode_out2" "xgene1") >> -(define_cpu_unit "xgene1_decode_out3" "xgene1") >> +(define_cpu_unit "xgene1_decode_out0" "xgene1_decoder") >> +(define_cpu_unit "xgene1_decode_out1" "xgene1_decoder") >> +(define_cpu_unit "xgene1_decode_out2" "xgene1_decoder") >> +(define_cpu_unit "xgene1_decode_out3" "xgene1_decoder") >> >> -(define_cpu_unit "xgene1_divide" "xgene1") >> -(define_cpu_unit "xgene1_fp_divide" "xgene1") >> -(define_cpu_unit "xgene1_fsu" "xgene1") >> -(define_cpu_unit "xgene1_fcmp" "xgene1") >> +(define_cpu_unit "xgene1_IXA" "xgene1_main") >> +(define_cpu_unit "xgene1_IXB" "xgene1_main") >> +(define_cpu_unit "xgene1_IXB_compl" "xgene1_main") >> + >> +(define_reservation "xgene1_IXn" "(xgene1_IXA | xgene1_IXB)") >> + >> +(define_cpu_unit "xgene1_multiply" "xgene1_main") >> +(define_cpu_unit "xgene1_divide" "xgene1_div") >> +(define_cpu_unit "xgene1_fp_divide" "xgene1_div") >> +(define_cpu_unit "xgene1_fsu" "xgene1_simd") >> +(define_cpu_unit "xgene1_fcmp" "xgene1_simd") >> +(define_cpu_unit "xgene1_ld" "xgene1_main") >> +(define_cpu_unit "xgene1_st" "xgene1_main") >> >> (define_reservation "xgene1_decode1op" >> "( xgene1_decode_out0 ) >> @@ -68,12 +77,12 @@ >> (define_insn_reservation "xgene1_f_load" 10 >> (and (eq_attr "tune" "xgene1") >> (eq_attr "type" "f_loadd,f_loads")) >> - "xgene1_decode2op") >> + "xgene1_decode2op, xgene1_ld") >> >> (define_insn_reservation "xgene1_f_store" 4 >> (and (eq_attr "tune" "xgene1") >> (eq_attr "type" "f_stored,f_stores")) >> - "xgene1_decode2op") >> + "xgene1_decode2op, xgene1_st") >> >> (define_insn_reservation "xgene1_fmov" 2 >> (and (eq_attr "tune" "xgene1") >> @@ -92,85 +101,108 @@ >> >> (define_insn_reservation "xgene1_load_pair" 6 >> (and (eq_attr "tune" "xgene1") >> - (eq_attr "type" "load_8, load_16")) >> - "xgene1_decodeIsolated") >> + (eq_attr "type" "load_16")) >> + "xgene1_decodeIsolated, xgene1_ld*2") >> >> (define_insn_reservation "xgene1_store_pair" 2 >> (and (eq_attr "tune" "xgene1") >> - (eq_attr "type" "store_8, store_16")) >> - "xgene1_decodeIsolated") >> + (eq_attr "type" "store_16")) >> + "xgene1_decodeIsolated, xgene1_st*2") >> >> (define_insn_reservation "xgene1_fp_load1" 10 >> (and (eq_attr "tune" "xgene1") >> - (eq_attr "type" "load_4") >> + (eq_attr "type" "load_4, load_8") >> (eq_attr "fp" "yes")) >> - "xgene1_decode1op") >> + "xgene1_decode1op, xgene1_ld") >> >> (define_insn_reservation "xgene1_load1" 5 >> (and (eq_attr "tune" "xgene1") >> - (eq_attr "type" "load_4")) >> - "xgene1_decode1op") >> + (eq_attr "type" "load_4, load_8")) >> + "xgene1_decode1op, xgene1_ld") >> >> -(define_insn_reservation "xgene1_store1" 2 >> +(define_insn_reservation "xgene1_store1" 1 >> (and (eq_attr "tune" "xgene1") >> - (eq_attr "type" "store_4")) >> - "xgene1_decode2op") >> + (eq_attr "type" "store_4, store_8")) >> + "xgene1_decode1op, xgene1_st") >> >> (define_insn_reservation "xgene1_move" 1 >> (and (eq_attr "tune" "xgene1") >> (eq_attr "type" "mov_reg,mov_imm,mrs")) >> - "xgene1_decode1op") >> + "xgene1_decode1op, xgene1_IXn") >> + >> +(define_insn_reservation "xgene1_alu_cond" 1 >> + (and (eq_attr "tune" "xgene1") >> + (eq_attr "type" "csel")) >> + "xgene1_decode1op, xgene1_IXn") >> >> (define_insn_reservation "xgene1_alu" 1 >> (and (eq_attr "tune" "xgene1") >> (eq_attr "type" "alu_imm,alu_sreg,alu_shift_imm,\ >> - alu_ext,adc_reg,csel,logic_imm,\ >> + alu_ext,adc_reg,logic_imm,\ >> logic_reg,logic_shift_imm,clz,\ >> - rbit,shift_reg,adr,mov_reg,\ >> - mov_imm,extend")) >> - "xgene1_decode1op") >> + rbit,adr,mov_reg,shift_imm,\ >> + mov_imm,extend,multiple")) >> + "xgene1_decode1op, xgene1_IXn") >> + >> +(define_insn_reservation "xgene1_shift_rotate" 2 >> + (and (eq_attr "tune" "xgene1") >> + (eq_attr "type" "shift_reg")) >> + "xgene1_decode1op, xgene1_IXB, xgene1_IXB_compl") >> >> -(define_insn_reservation "xgene1_simd" 1 >> +(define_insn_reservation "xgene1_simd" 2 >> (and (eq_attr "tune" "xgene1") >> (eq_attr "type" "rev")) >> - "xgene1_decode1op") >> + "xgene1_decode1op, xgene1_IXB, xgene1_IXB_compl") >> >> (define_insn_reservation "xgene1_alus" 1 >> (and (eq_attr "tune" "xgene1") >> - (eq_attr "type" "alus_imm,alu_sreg,alus_shift_imm,\ >> + (eq_attr "type" "alus_imm,alus_sreg,alus_shift_imm,\ >> alus_ext,logics_imm,logics_reg,\ >> logics_shift_imm")) >> - "xgene1_decode1op") >> + "xgene1_decode1op, xgene1_IXB, xgene1_IXB_compl") >> + >> +(define_bypass 2 "xgene1_alus" >> + "xgene1_alu_cond, xgene1_branch") >> >> -(define_insn_reservation "xgene1_mul" 6 >> +(define_insn_reservation "xgene1_mul32" 4 >> (and (eq_attr "tune" "xgene1") >> - (eq_attr "type" "mul,mla,smull,umull,smlal,umlal")) >> - "xgene1_decode2op") >> + (eq_attr "mul32" "yes")) >> + "xgene1_decode2op, xgene1_IXB + xgene1_multiply, xgene1_multiply, >> nothing, xgene1_IXB_compl") >> + >> +(define_insn_reservation "xgene1_mul64" 5 >> + (and (eq_attr "tune" "xgene1") >> + (eq_attr "mul64" "yes")) >> + "xgene1_decode2op, xgene1_IXB + xgene1_multiply, xgene1_multiply, >> nothing*2, xgene1_IXB_compl") >> >> (define_insn_reservation "xgene1_div" 34 >> (and (eq_attr "tune" "xgene1") >> (eq_attr "type" "sdiv,udiv")) >> - "xgene1_decode1op,xgene1_divide*7") >> + "xgene1_decode1op, xgene1_IXB + xgene1_divide*7") >> >> (define_insn_reservation "xgene1_fcmp" 10 >> (and (eq_attr "tune" "xgene1") >> (eq_attr "type" "fcmpd,fcmps,fccmpd,fccmps")) >> - "xgene1_decode1op,xgene1_fsu+xgene1_fcmp*3") >> + "xgene1_decode1op, xgene1_fsu + xgene1_fcmp*3") >> >> (define_insn_reservation "xgene1_fcsel" 3 >> (and (eq_attr "tune" "xgene1") >> (eq_attr "type" "fcsel")) >> - "xgene1_decode1op,xgene1_fsu") >> + "xgene1_decode1op, xgene1_fsu") >> + >> +(define_insn_reservation "xgene1_bfx" 1 >> + (and (eq_attr "tune" "xgene1") >> + (eq_attr "type" "bfx")) >> + "xgene1_decode1op, xgene1_IXn") >> >> (define_insn_reservation "xgene1_bfm" 2 >> (and (eq_attr "tune" "xgene1") >> - (eq_attr "type" "bfm,bfx")) >> - "xgene1_decode1op,xgene1_fsu") >> + (eq_attr "type" "bfm")) >> + "xgene1_decode1op, xgene1_IXB, xgene1_IXB_compl") >> >> (define_insn_reservation "xgene1_f_rint" 5 >> (and (eq_attr "tune" "xgene1") >> (eq_attr "type" "f_rintd,f_rints")) >> - "xgene1_decode1op,xgene1_fsu") >> + "xgene1_decode1op, xgene1_fsu") >> >> (define_insn_reservation "xgene1_f_cvt" 3 >> (and (eq_attr "tune" "xgene1") >> @@ -225,12 +257,12 @@ >> (define_insn_reservation "xgene1_neon_load1" 11 >> (and (eq_attr "tune" "xgene1") >> (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q")) >> - "xgene1_decode2op,xgene1_fsu") >> + "xgene1_decode2op, xgene1_ld") >> >> (define_insn_reservation "xgene1_neon_store1" 5 >> (and (eq_attr "tune" "xgene1") >> (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q")) >> - "xgene1_decode2op,xgene1_fsu") >> + "xgene1_decode2op, xgene1_st") >> >> (define_insn_reservation "xgene1_neon_logic" 2 >> (and (eq_attr "tune" "xgene1") >> @@ -300,6 +332,8 @@ >> neon_compare_zero_q,\ >> neon_tst,\ >> neon_tst_q,\ >> + neon_minmax,\ >> + neon_minmax_q,\ >> ")) >> "xgene1_decode1op,xgene1_fsu") >> >> @@ -439,8 +473,10 @@ >> (and (eq_attr "tune" "xgene1") >> (eq_attr "type" "neon_store1_one_lane,\ >> neon_store1_one_lane_q,\ >> + neon_stp,\ >> + neon_stp_q,\ >> ")) >> - "xgene1_decode1op") >> + "xgene1_decodeIsolated, xgene1_st") >> >> (define_insn_reservation "xgene1_neon_halve_narrow" 6 >> (and (eq_attr "tune" "xgene1") >> @@ -499,7 +535,7 @@ >> (and (eq_attr "tune" "xgene1") >> (eq_attr "type" "neon_load1_all_lanes,\ >> ")) >> - "xgene1_decode1op") >> + "xgene1_decode1op, xgene1_ld") >> >> (define_insn_reservation "xgene1_neon_fp_recp" 3 >> (and (eq_attr "tune" "xgene1") >> >
signature.asc
Description: Message signed with OpenPGP using GPGMail