Could you please commit it for me? I don’t have commit rights. Thanks,
Dominik > On 13 Nov 2017, at 12:27, Kyrill Tkachov <kyrylo.tkac...@foss.arm.com> wrote: > > > On 13/11/17 11:09, Dominik Inführ wrote: >> Oh sure, I've now successfully bootstrapped on arm-linux-gnueabihf and >> aarch64-unknown-linux-gnu. >> >> Dominik >> > > Thanks Dominik, > > This is ok for trunk. > > Kyrill > >>> On 10 Nov 2017, at 10:53, Kyrill Tkachov <kyrylo.tkac...@foss.arm.com> >>> wrote: >>> >>> Hi Dominic, >>> >>> On 10/11/17 09:36, Dominik Inführ wrote: >>>> Hi, >>>> >>>> this patch tries to refine the instruction scheduling model for X-Gene. >>>> Improved performance for 456.hmmer and 464.h264ref (about 1%). Also splits >>>> the model into multiple automatons, therefore smaller binary and faster >>>> build time. Survives bootstrap. >>>> >>>> Best, >>>> Dominik >>> The changes look ok to me, but as the description is shared between the arm >>> and aarch64 ports can you please also do a sanity check >>> by building (and preferably bootstrapping) an arm compiler? >>> >>> Thanks, >>> Kyrill >>> >>>> gcc/ChangeLog: >>>> 2017-10-09 Dominik Infuehr <dominik.infu...@theobroma-systems.com> >>>> >>>> * config/arm/xgene1.md (xgene1): Split into automatons >>>> xgene1_main, xgene1_decoder, xgene1_div, xgene1_simd. >>>> (xgene1_f_load): Adjust reservations and/or types. >>>> (xgene1_f_store): Likewise. >>>> (xgene1_load_pair): Likewise. >>>> (xgene1_store_pair): Likewise. >>>> (xgene1_fp_load1): Likewise. >>>> (xgene1_load1): Likewise. >>>> (xgene1_store1): Likewise. >>>> (xgene1_move): Likewise. >>>> (xgene1_alu): Likewise. >>>> (xgene1_simd): Likewise. >>>> (xgene1_bfm): Likewise. >>>> (xgene1_neon_load1): Likewise. >>>> (xgene1_neon_store1): Likewise. >>>> (xgene1_neon_logic): Likewise. >>>> (xgene1_neon_st1): Likewise. >>>> (xgene1_neon_ld1r): Likewise. >>>> (xgene1_alu_cond): Added. >>>> (xgene1_shift_reg): Likwise. >>>> (xgene1_bfx): Likewise. >>>> (xgene1_mul): Split into xgene1_mul32, xgene1_mul64. >>>> >>>> — >>>> diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md >>>> index c4b3773..cf0694a 100644 >>>> --- a/gcc/config/arm/xgene1.md >>>> +++ b/gcc/config/arm/xgene1.md >>>> @@ -20,17 +20,26 @@ >>>> >>>> ;; Pipeline description for the xgene1 micro-architecture >>>> >>>> -(define_automaton "xgene1") >>>> +(define_automaton "xgene1_main, xgene1_decoder, xgene1_div, xgene1_simd") >>>> >>>> -(define_cpu_unit "xgene1_decode_out0" "xgene1") >>>> -(define_cpu_unit "xgene1_decode_out1" "xgene1") >>>> -(define_cpu_unit "xgene1_decode_out2" "xgene1") >>>> -(define_cpu_unit "xgene1_decode_out3" "xgene1") >>>> +(define_cpu_unit "xgene1_decode_out0" "xgene1_decoder") >>>> +(define_cpu_unit "xgene1_decode_out1" "xgene1_decoder") >>>> +(define_cpu_unit "xgene1_decode_out2" "xgene1_decoder") >>>> +(define_cpu_unit "xgene1_decode_out3" "xgene1_decoder") >>>> >>>> -(define_cpu_unit "xgene1_divide" "xgene1") >>>> -(define_cpu_unit "xgene1_fp_divide" "xgene1") >>>> -(define_cpu_unit "xgene1_fsu" "xgene1") >>>> -(define_cpu_unit "xgene1_fcmp" "xgene1") >>>> +(define_cpu_unit "xgene1_IXA" "xgene1_main") >>>> +(define_cpu_unit "xgene1_IXB" "xgene1_main") >>>> +(define_cpu_unit "xgene1_IXB_compl" "xgene1_main") >>>> + >>>> +(define_reservation "xgene1_IXn" "(xgene1_IXA | xgene1_IXB)") >>>> + >>>> +(define_cpu_unit "xgene1_multiply" "xgene1_main") >>>> +(define_cpu_unit "xgene1_divide" "xgene1_div") >>>> +(define_cpu_unit "xgene1_fp_divide" "xgene1_div") >>>> +(define_cpu_unit "xgene1_fsu" "xgene1_simd") >>>> +(define_cpu_unit "xgene1_fcmp" "xgene1_simd") >>>> +(define_cpu_unit "xgene1_ld" "xgene1_main") >>>> +(define_cpu_unit "xgene1_st" "xgene1_main") >>>> >>>> (define_reservation "xgene1_decode1op" >>>> "( xgene1_decode_out0 ) >>>> @@ -68,12 +77,12 @@ >>>> (define_insn_reservation "xgene1_f_load" 10 >>>> (and (eq_attr "tune" "xgene1") >>>> (eq_attr "type" "f_loadd,f_loads")) >>>> - "xgene1_decode2op") >>>> + "xgene1_decode2op, xgene1_ld") >>>> >>>> (define_insn_reservation "xgene1_f_store" 4 >>>> (and (eq_attr "tune" "xgene1") >>>> (eq_attr "type" "f_stored,f_stores")) >>>> - "xgene1_decode2op") >>>> + "xgene1_decode2op, xgene1_st") >>>> >>>> (define_insn_reservation "xgene1_fmov" 2 >>>> (and (eq_attr "tune" "xgene1") >>>> @@ -92,85 +101,108 @@ >>>> >>>> (define_insn_reservation "xgene1_load_pair" 6 >>>> (and (eq_attr "tune" "xgene1") >>>> - (eq_attr "type" "load_8, load_16")) >>>> - "xgene1_decodeIsolated") >>>> + (eq_attr "type" "load_16")) >>>> + "xgene1_decodeIsolated, xgene1_ld*2") >>>> >>>> (define_insn_reservation "xgene1_store_pair" 2 >>>> (and (eq_attr "tune" "xgene1") >>>> - (eq_attr "type" "store_8, store_16")) >>>> - "xgene1_decodeIsolated") >>>> + (eq_attr "type" "store_16")) >>>> + "xgene1_decodeIsolated, xgene1_st*2") >>>> >>>> (define_insn_reservation "xgene1_fp_load1" 10 >>>> (and (eq_attr "tune" "xgene1") >>>> - (eq_attr "type" "load_4") >>>> + (eq_attr "type" "load_4, load_8") >>>> (eq_attr "fp" "yes")) >>>> - "xgene1_decode1op") >>>> + "xgene1_decode1op, xgene1_ld") >>>> >>>> (define_insn_reservation "xgene1_load1" 5 >>>> (and (eq_attr "tune" "xgene1") >>>> - (eq_attr "type" "load_4")) >>>> - "xgene1_decode1op") >>>> + (eq_attr "type" "load_4, load_8")) >>>> + "xgene1_decode1op, xgene1_ld") >>>> >>>> -(define_insn_reservation "xgene1_store1" 2 >>>> +(define_insn_reservation "xgene1_store1" 1 >>>> (and (eq_attr "tune" "xgene1") >>>> - (eq_attr "type" "store_4")) >>>> - "xgene1_decode2op") >>>> + (eq_attr "type" "store_4, store_8")) >>>> + "xgene1_decode1op, xgene1_st") >>>> >>>> (define_insn_reservation "xgene1_move" 1 >>>> (and (eq_attr "tune" "xgene1") >>>> (eq_attr "type" "mov_reg,mov_imm,mrs")) >>>> - "xgene1_decode1op") >>>> + "xgene1_decode1op, xgene1_IXn") >>>> + >>>> +(define_insn_reservation "xgene1_alu_cond" 1 >>>> + (and (eq_attr "tune" "xgene1") >>>> + (eq_attr "type" "csel")) >>>> + "xgene1_decode1op, xgene1_IXn") >>>> >>>> (define_insn_reservation "xgene1_alu" 1 >>>> (and (eq_attr "tune" "xgene1") >>>> (eq_attr "type" "alu_imm,alu_sreg,alu_shift_imm,\ >>>> - alu_ext,adc_reg,csel,logic_imm,\ >>>> + alu_ext,adc_reg,logic_imm,\ >>>> logic_reg,logic_shift_imm,clz,\ >>>> - rbit,shift_reg,adr,mov_reg,\ >>>> - mov_imm,extend")) >>>> - "xgene1_decode1op") >>>> + rbit,adr,mov_reg,shift_imm,\ >>>> + mov_imm,extend,multiple")) >>>> + "xgene1_decode1op, xgene1_IXn") >>>> + >>>> +(define_insn_reservation "xgene1_shift_rotate" 2 >>>> + (and (eq_attr "tune" "xgene1") >>>> + (eq_attr "type" "shift_reg")) >>>> + "xgene1_decode1op, xgene1_IXB, xgene1_IXB_compl") >>>> >>>> -(define_insn_reservation "xgene1_simd" 1 >>>> +(define_insn_reservation "xgene1_simd" 2 >>>> (and (eq_attr "tune" "xgene1") >>>> (eq_attr "type" "rev")) >>>> - "xgene1_decode1op") >>>> + "xgene1_decode1op, xgene1_IXB, xgene1_IXB_compl") >>>> >>>> (define_insn_reservation "xgene1_alus" 1 >>>> (and (eq_attr "tune" "xgene1") >>>> - (eq_attr "type" "alus_imm,alu_sreg,alus_shift_imm,\ >>>> + (eq_attr "type" "alus_imm,alus_sreg,alus_shift_imm,\ >>>> alus_ext,logics_imm,logics_reg,\ >>>> logics_shift_imm")) >>>> - "xgene1_decode1op") >>>> + "xgene1_decode1op, xgene1_IXB, xgene1_IXB_compl") >>>> + >>>> +(define_bypass 2 "xgene1_alus" >>>> + "xgene1_alu_cond, xgene1_branch") >>>> >>>> -(define_insn_reservation "xgene1_mul" 6 >>>> +(define_insn_reservation "xgene1_mul32" 4 >>>> (and (eq_attr "tune" "xgene1") >>>> - (eq_attr "type" "mul,mla,smull,umull,smlal,umlal")) >>>> - "xgene1_decode2op") >>>> + (eq_attr "mul32" "yes")) >>>> + "xgene1_decode2op, xgene1_IXB + xgene1_multiply, xgene1_multiply, >>>> nothing, xgene1_IXB_compl") >>>> + >>>> +(define_insn_reservation "xgene1_mul64" 5 >>>> + (and (eq_attr "tune" "xgene1") >>>> + (eq_attr "mul64" "yes")) >>>> + "xgene1_decode2op, xgene1_IXB + xgene1_multiply, xgene1_multiply, >>>> nothing*2, xgene1_IXB_compl") >>>> >>>> (define_insn_reservation "xgene1_div" 34 >>>> (and (eq_attr "tune" "xgene1") >>>> (eq_attr "type" "sdiv,udiv")) >>>> - "xgene1_decode1op,xgene1_divide*7") >>>> + "xgene1_decode1op, xgene1_IXB + xgene1_divide*7") >>>> >>>> (define_insn_reservation "xgene1_fcmp" 10 >>>> (and (eq_attr "tune" "xgene1") >>>> (eq_attr "type" "fcmpd,fcmps,fccmpd,fccmps")) >>>> - "xgene1_decode1op,xgene1_fsu+xgene1_fcmp*3") >>>> + "xgene1_decode1op, xgene1_fsu + xgene1_fcmp*3") >>>> >>>> (define_insn_reservation "xgene1_fcsel" 3 >>>> (and (eq_attr "tune" "xgene1") >>>> (eq_attr "type" "fcsel")) >>>> - "xgene1_decode1op,xgene1_fsu") >>>> + "xgene1_decode1op, xgene1_fsu") >>>> + >>>> +(define_insn_reservation "xgene1_bfx" 1 >>>> + (and (eq_attr "tune" "xgene1") >>>> + (eq_attr "type" "bfx")) >>>> + "xgene1_decode1op, xgene1_IXn") >>>> >>>> (define_insn_reservation "xgene1_bfm" 2 >>>> (and (eq_attr "tune" "xgene1") >>>> - (eq_attr "type" "bfm,bfx")) >>>> - "xgene1_decode1op,xgene1_fsu") >>>> + (eq_attr "type" "bfm")) >>>> + "xgene1_decode1op, xgene1_IXB, xgene1_IXB_compl") >>>> >>>> (define_insn_reservation "xgene1_f_rint" 5 >>>> (and (eq_attr "tune" "xgene1") >>>> (eq_attr "type" "f_rintd,f_rints")) >>>> - "xgene1_decode1op,xgene1_fsu") >>>> + "xgene1_decode1op, xgene1_fsu") >>>> >>>> (define_insn_reservation "xgene1_f_cvt" 3 >>>> (and (eq_attr "tune" "xgene1") >>>> @@ -225,12 +257,12 @@ >>>> (define_insn_reservation "xgene1_neon_load1" 11 >>>> (and (eq_attr "tune" "xgene1") >>>> (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q")) >>>> - "xgene1_decode2op,xgene1_fsu") >>>> + "xgene1_decode2op, xgene1_ld") >>>> >>>> (define_insn_reservation "xgene1_neon_store1" 5 >>>> (and (eq_attr "tune" "xgene1") >>>> (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q")) >>>> - "xgene1_decode2op,xgene1_fsu") >>>> + "xgene1_decode2op, xgene1_st") >>>> >>>> (define_insn_reservation "xgene1_neon_logic" 2 >>>> (and (eq_attr "tune" "xgene1") >>>> @@ -300,6 +332,8 @@ >>>> neon_compare_zero_q,\ >>>> neon_tst,\ >>>> neon_tst_q,\ >>>> + neon_minmax,\ >>>> + neon_minmax_q,\ >>>> ")) >>>> "xgene1_decode1op,xgene1_fsu") >>>> >>>> @@ -439,8 +473,10 @@ >>>> (and (eq_attr "tune" "xgene1") >>>> (eq_attr "type" "neon_store1_one_lane,\ >>>> neon_store1_one_lane_q,\ >>>> + neon_stp,\ >>>> + neon_stp_q,\ >>>> ")) >>>> - "xgene1_decode1op") >>>> + "xgene1_decodeIsolated, xgene1_st") >>>> >>>> (define_insn_reservation "xgene1_neon_halve_narrow" 6 >>>> (and (eq_attr "tune" "xgene1") >>>> @@ -499,7 +535,7 @@ >>>> (and (eq_attr "tune" "xgene1") >>>> (eq_attr "type" "neon_load1_all_lanes,\ >>>> ")) >>>> - "xgene1_decode1op") >>>> + "xgene1_decode1op, xgene1_ld") >>>> >>>> (define_insn_reservation "xgene1_neon_fp_recp" 3 >>>> (and (eq_attr "tune" "xgene1") >>>> >
signature.asc
Description: Message signed with OpenPGP using GPGMail