[AMD Official Use Only - General] Hi,
Thank you for fixing this. > -----Original Message----- > From: Alexander Monakov <amona...@ispras.ru> > Sent: Tuesday, November 1, 2022 9:57 PM > To: gcc-patches@gcc.gnu.org > Cc: Jan Hubička <honza.hubi...@gmail.com>; Joshi, Tejas Sanjay > <tejassanjay.jo...@amd.com>; Kumar, Venkataramanan > <venkataramanan.ku...@amd.com>; Alexander Monakov > <amona...@ispras.ru> > Subject: [PATCH 2/2] i386: correct x87&SSE multiplication modeling in > znver.md > > Caution: This message originated from an External Source. Use proper > caution when opening attachments, clicking links, or responding. > > > All multiplication instructions are fully pipelined, except AVX256 > instructions on Zen 1, which issue over two cycles on a 128-bit unit. > Correct the model accordingly to reduce combinatorial explosion in > automaton tables. > > Top znver table sizes in insn-automata.o: > > Before: > > 30056 r znver1_fp_min_issue_delay > 120224 r znver1_fp_transitions > > After: > > 6720 r znver1_fp_min_issue_delay > 53760 r znver1_fp_transitions > > gcc/ChangeLog: > > PR target/87832 > * config/i386/znver.md: (znver1_fp_op_mul): Correct cycles in > the reservation. > (znver1_fp_op_mul_load): Ditto. > (znver1_mmx_mul): Ditto. > (znver1_mmx_load): Ditto. > (znver1_ssemul_ss_ps): Ditto. > (znver1_ssemul_ss_ps_load): Ditto. > (znver1_ssemul_avx256_ps): Ditto. > (znver1_ssemul_avx256_ps_load): Ditto. > (znver1_ssemul_sd_pd): Ditto. > (znver1_ssemul_sd_pd_load): Ditto. > (znver2_ssemul_sd_pd): Ditto. > (znver2_ssemul_sd_pd_load): Ditto. > (znver1_ssemul_avx256_pd): Ditto. > (znver1_ssemul_avx256_pd_load): Ditto. > (znver1_sseimul): Ditto. > (znver1_sseimul_avx256): Ditto. > (znver1_sseimul_load): Ditto. > (znver1_sseimul_avx256_load): Ditto. > (znver1_sseimul_di): Ditto. > (znver1_sseimul_load_di): Ditto. > --- > gcc/config/i386/znver.md | 40 ++++++++++++++++++++-------------------- > 1 file changed, 20 insertions(+), 20 deletions(-) > > diff --git a/gcc/config/i386/znver.md b/gcc/config/i386/znver.md index > c52f8b532..882f250f1 100644 > --- a/gcc/config/i386/znver.md > +++ b/gcc/config/i386/znver.md > @@ -573,13 +573,13 @@ (define_insn_reservation "znver1_fp_op_mul" 5 > (and (eq_attr "cpu" "znver1,znver2,znver3") > (and (eq_attr "type" "fop,fmul") > (eq_attr "memory" "none"))) > - "znver1-direct,znver1-fp0*5") > + "znver1-direct,znver1-fp0") > > (define_insn_reservation "znver1_fp_op_mul_load" 12 > (and (eq_attr "cpu" "znver1,znver2,znver3") > (and (eq_attr "type" "fop,fmul") > (eq_attr "memory" "load"))) > - "znver1-direct,znver1-load,znver1-fp0*5") > + "znver1-direct,znver1-load,znver1-fp0") > > (define_insn_reservation "znver1_fp_op_imul_load" 16 > (and (eq_attr "cpu" "znver1,znver2,znver3") @@ > -684,13 > +684,13 @@ (define_insn_reservation "znver1_mmx_mul" 3 > (and (eq_attr "cpu" "znver1,znver2,znver3") > (and (eq_attr "type" "mmxmul") > (eq_attr "memory" "none"))) > - "znver1-direct,znver1-fp0*3") > + "znver1-direct,znver1-fp0") > > (define_insn_reservation "znver1_mmx_load" 10 > (and (eq_attr "cpu" "znver1,znver2,znver3") > (and (eq_attr "type" "mmxmul") > (eq_attr "memory" "load"))) > - "znver1-direct,znver1-load,znver1-fp0*3") > + "znver1-direct,znver1-load,znver1-fp0") > > ;; TODO > (define_insn_reservation "znver1_avx256_log" 1 @@ -1161,7 +1161,7 > @@ (define_insn_reservation "znver1_ssemul_ss_ps" 3 > (eq_attr "mode" > "V8SF,V4SF,SF,V4DF,V2DF,DF"))) > (and (eq_attr "type" "ssemul") > (eq_attr "memory" "none"))) > - "znver1-direct,(znver1-fp0|znver1-fp1)*3") > + "znver1-direct,znver1-fp0|znver1-fp1") > > (define_insn_reservation "znver1_ssemul_ss_ps_load" 10 > (and (ior (and (eq_attr "cpu" "znver1") @@ -1172,47 > +1172,47 @@ (define_insn_reservation "znver1_ssemul_ss_ps_load" 10 > (eq_attr "mode" > "V8SF,V4SF,SF"))) > (and (eq_attr "type" "ssemul") > (eq_attr "memory" "load"))) > - > "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3") > + > + "znver1-direct,znver1-load,znver1-fp0|znver1-fp1") > > (define_insn_reservation "znver1_ssemul_avx256_ps" 3 > (and (eq_attr "cpu" "znver1") > (and (eq_attr "mode" "V8SF") > (and (eq_attr "type" "ssemul") > (eq_attr "memory" "none")))) > - "znver1-double,(znver1-fp0|znver1-fp1)*3") > + "znver1-double,znver1-fp0*2|znver1-fp1*2") > > (define_insn_reservation "znver1_ssemul_avx256_ps_load" 10 > (and (eq_attr "cpu" "znver1") > (and (eq_attr "mode" "V8SF") > (and (eq_attr "type" "ssemul") > (eq_attr "memory" "load")))) > - > "znver1-double,znver1-load,(znver1-fp0|znver1-fp1)*3") > + > + "znver1-double,znver1-load,znver1-fp0*2|znver1-fp1*2") > > (define_insn_reservation "znver1_ssemul_sd_pd" 4 > (and (eq_attr "cpu" "znver1") > (and (eq_attr "mode" "V2DF,DF") > (and (eq_attr "type" "ssemul") > (eq_attr "memory" "none")))) > - "znver1-direct,(znver1-fp0|znver1-fp1)*4") > + "znver1-direct,znver1-fp0|znver1-fp1") > > (define_insn_reservation "znver1_ssemul_sd_pd_load" 11 > (and (eq_attr "cpu" "znver1") > (and (eq_attr "mode" "V2DF,DF") > (and (eq_attr "type" "ssemul") > (eq_attr "memory" "load")))) > - > "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*4") > + > + "znver1-direct,znver1-load,znver1-fp0|znver1-fp1") > > (define_insn_reservation "znver2_ssemul_sd_pd" 3 > (and (eq_attr "cpu" "znver2,znver3") > (and (eq_attr "type" "ssemul") > (eq_attr "memory" "none"))) > - "znver1-direct,(znver1-fp0|znver1-fp1)*3") > + "znver1-direct,znver1-fp0|znver1-fp1") > > (define_insn_reservation "znver2_ssemul_sd_pd_load" 10 > (and (eq_attr "cpu" "znver2,znver3") > (and (eq_attr "type" "ssemul") > (eq_attr "memory" "load"))) > - > "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3") > + > + "znver1-direct,znver1-load,znver1-fp0|znver1-fp1") > > > (define_insn_reservation "znver1_ssemul_avx256_pd" 5 @@ -1220,14 > +1220,14 @@ (define_insn_reservation "znver1_ssemul_avx256_pd" 5 > (and (eq_attr "mode" "V4DF") > (and (eq_attr "type" "ssemul") > (eq_attr "memory" "none")))) > - "znver1-double,(znver1-fp0|znver1-fp1)*4") > + "znver1-double,znver1-fp0*2|znver1-fp1*2") Do we need to include "znver1" check here? > > (define_insn_reservation "znver1_ssemul_avx256_pd_load" 12 > (and (eq_attr "cpu" "znver1") > (and (eq_attr "mode" "V4DF") > (and (eq_attr "type" "ssemul") > (eq_attr "memory" "load")))) > - > "znver1-double,znver1-load,(znver1-fp0|znver1-fp1)*4") > + > + "znver1-double,znver1-load,znver1-fp0*2|znver1-fp1*2") > > ;;SSE imul > (define_insn_reservation "znver1_sseimul" 3 @@ -1239,14 +1239,14 @@ > (define_insn_reservation "znver1_sseimul" 3 > (eq_attr "mode" "TI,OI"))) > (and (eq_attr "type" "sseimul") > (eq_attr "memory" "none"))) > - "znver1-direct,znver1-fp0*3") > + "znver1-direct,znver1-fp0") > > (define_insn_reservation "znver1_sseimul_avx256" 4 > (and (eq_attr "cpu" "znver1,znver2,znver3") > (and (eq_attr "mode" "OI") > (and (eq_attr "type" "sseimul") > (eq_attr "memory" "none")))) > - "znver1-double,znver1-fp0*4") > + "znver1-double,znver1-fp0*2") znver1 native path is 128 and znver2/3 has 256 bit paths. We need to split this into two reservations. One for znver1 and the other for znver2/3. > > (define_insn_reservation "znver1_sseimul_load" 10 > (and (ior (and (eq_attr "cpu" "znver1") @@ -1257,28 > +1257,28 @@ (define_insn_reservation "znver1_sseimul_load" 10 > (eq_attr "mode" "TI,OI"))) > (and (eq_attr "type" "sseimul") > (eq_attr "memory" "load"))) > - "znver1-direct,znver1-load,znver1-fp0*3") > + "znver1-direct,znver1-load,znver1-fp0") > > (define_insn_reservation "znver1_sseimul_avx256_load" 11 > (and (eq_attr "cpu" "znver1,znver2,znver3") > (and (eq_attr "mode" "OI") > (and (eq_attr "type" "sseimul") > (eq_attr "memory" "load")))) > - "znver1-double,znver1-load,znver1-fp0*4") > + "znver1-double,znver1-load,znver1-fp0*2") We need to split this into two reservations. One for znver1 and the other for znver2/3. > > (define_insn_reservation "znver1_sseimul_di" 3 > (and (eq_attr "cpu" "znver1,znver2,znver3") > (and (eq_attr "mode" "DI") > (and (eq_attr "memory" "none") > (eq_attr "type" "sseimul")))) > - "znver1-direct,znver1-fp0*3") > + "znver1-direct,znver1-fp0") > > (define_insn_reservation "znver1_sseimul_load_di" 10 > (and (eq_attr "cpu" "znver1,znver2,znver3") > (and (eq_attr "mode" "DI") > (and (eq_attr "type" "sseimul") > (eq_attr "memory" "load")))) > - "znver1-direct,znver1-load,znver1-fp0*3") > + "znver1-direct,znver1-load,znver1-fp0") > > ;; SSE compares > (define_insn_reservation "znver1_sse_cmp" 1 > -- > 2.37.2 The patch looks good. Regards, Venkat.