ping
> -----Original Message----- > From: Tamar Christina > Sent: Monday, December 20, 2021 4:21 PM > To: Richard Sandiford <richard.sandif...@arm.com> > Cc: gcc-patches@gcc.gnu.org; nd <n...@arm.com>; Richard Earnshaw > <richard.earns...@arm.com>; Marcus Shawcroft > <marcus.shawcr...@arm.com>; Kyrylo Tkachov <kyrylo.tkac...@arm.com> > Subject: RE: [2/3 PATCH]AArch64 use canonical ordering for complex > mul, fma and fms > > > > > -----Original Message----- > > From: Richard Sandiford <richard.sandif...@arm.com> > > Sent: Friday, December 17, 2021 4:49 PM > > To: Tamar Christina <tamar.christ...@arm.com> > > Cc: gcc-patches@gcc.gnu.org; nd <n...@arm.com>; Richard Earnshaw > > <richard.earns...@arm.com>; Marcus Shawcroft > > <marcus.shawcr...@arm.com>; Kyrylo Tkachov > <kyrylo.tkac...@arm.com> > > Subject: Re: [2/3 PATCH]AArch64 use canonical ordering for complex > > mul, fma and fms > > > > Richard Sandiford <richard.sandif...@arm.com> writes: > > > Tamar Christina <tamar.christ...@arm.com> writes: > > >> Hi All, > > >> > > >> After the first patch in the series this updates the optabs to > > >> expect the canonical sequence. > > >> > > >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > >> > > >> Ok for master? and backport along with the first patch? > > >> > > >> Thanks, > > >> Tamar > > >> > > >> gcc/ChangeLog: > > >> > > >> PR tree-optimization/102819 > > >> PR tree-optimization/103169 > > >> * config/aarch64/aarch64-simd.md > > (cml<fcmac1><conj_op><mode>4, > > >> cmul<conj_op><mode>3): Use canonical order. > > >> * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4, > > >> cmul<conj_op><mode>3): Likewise. > > >> > > >> --- inline copy of patch -- > > >> diff --git a/gcc/config/aarch64/aarch64-simd.md > > >> b/gcc/config/aarch64/aarch64-simd.md > > >> index > > >> > > > f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..875896ee71324712c8034eeff9 > > c > > >> fb5649f9b0e73 100644 > > >> --- a/gcc/config/aarch64/aarch64-simd.md > > >> +++ b/gcc/config/aarch64/aarch64-simd.md > > >> @@ -556,17 +556,17 @@ (define_insn > > "aarch64_fcmlaq_lane<rot><mode>" > > >> ;; remainder. Because of this, expand early. > > >> (define_expand "cml<fcmac1><conj_op><mode>4" > > >> [(set (match_operand:VHSDF 0 "register_operand") > > >> - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") > > >> - (unspec:VHSDF [(match_operand:VHSDF 2 > > "register_operand") > > >> - (match_operand:VHSDF 3 > > "register_operand")] > > >> - FCMLA_OP)))] > > >> + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 > > "register_operand") > > >> + (match_operand:VHSDF 2 > > "register_operand")] > > >> + FCMLA_OP) > > >> + (match_operand:VHSDF 3 "register_operand")))] > > >> "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" > > >> { > > >> rtx tmp = gen_reg_rtx (<MODE>mode); > > >> - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], > > >> - operands[3], > > >> operands[2])); > > >> + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], > > >> + operands[1], > > >> operands[2])); > > >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, > > >> - operands[3], > > >> operands[2])); > > >> + operands[1], > > >> operands[2])); > > >> DONE; > > >> }) > > >> > > >> @@ -583,9 +583,9 @@ (define_expand "cmul<conj_op><mode>3" > > >> rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); > > >> rtx res1 = gen_reg_rtx (<MODE>mode); > > >> emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp, > > >> - operands[2], > > >> operands[1])); > > >> + operands[1], > > >> operands[2])); > > >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1, > > >> - operands[2], > > >> operands[1])); > > >> + operands[1], > > >> operands[2])); > > > > > > This doesn't look right. Going from the documentation, patch 1 > > > isn't changing the operand order for CMUL: the conjugated operand > > > (if there is one) is still operand 2. The FCMLA sequences use the > > > opposite order, where the conjugated operand (if there is one) is > operand 1. > > > So I think > > > > I meant “the first multiplication operand” rather than “operand 1” here. > > > > > the reversal here is still needed. > > > > > > Same for the multiplication operands in CML* above. > > I did actually change the order in patch 1, but didn't update the docs.. > That was done because I followed the SLP order again, but now I've > updated them to do what the docs say. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? and backport along with the first patch? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/102819 > PR tree-optimization/103169 > * config/aarch64/aarch64-simd.md > (cml<fcmac1><conj_op><mode>4): Use > canonical order. > * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4): > Likewise. > > --- inline copy of patch --- > > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..9e41610fba85862ef7675bea1 > e5731b14cab59ce 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -556,17 +556,17 @@ (define_insn "aarch64_fcmlaq_lane<rot><mode>" > ;; remainder. Because of this, expand early. > (define_expand "cml<fcmac1><conj_op><mode>4" > [(set (match_operand:VHSDF 0 "register_operand") > - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") > - (unspec:VHSDF [(match_operand:VHSDF 2 > "register_operand") > - (match_operand:VHSDF 3 > "register_operand")] > - FCMLA_OP)))] > + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 > "register_operand") > + (match_operand:VHSDF 2 > "register_operand")] > + FCMLA_OP) > + (match_operand:VHSDF 3 "register_operand")))] > "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" > { > rtx tmp = gen_reg_rtx (<MODE>mode); > - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], > - operands[3], operands[2])); > + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], > + operands[2], operands[1])); > emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, > - operands[3], operands[2])); > + operands[2], operands[1])); > DONE; > }) > > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index > 9ef968840c20a3049901b3f8a919cf27ded1da3e..9ed19017c480b88779e9e3b08 > c0e031be60a8c12 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -7278,11 +7278,11 @@ (define_expand "cml<fcmac1><conj_op><mode>4" > rtx tmp = gen_reg_rtx (<MODE>mode); > emit_insn > (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4], > - operands[3], operands[2], > - operands[1], operands[5])); > + operands[2], operands[1], > + operands[3], operands[5])); > emit_insn > (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4], > - operands[3], operands[2], > + operands[2], operands[1], > tmp, operands[5])); > DONE; > })