Tamar Christina <tamar.christ...@arm.com> writes: >> -----Original Message----- >> From: Richard Sandiford <richard.sandif...@arm.com> >> Sent: Friday, December 17, 2021 4:49 PM >> To: Tamar Christina <tamar.christ...@arm.com> >> Cc: gcc-patches@gcc.gnu.org; nd <n...@arm.com>; Richard Earnshaw >> <richard.earns...@arm.com>; Marcus Shawcroft >> <marcus.shawcr...@arm.com>; Kyrylo Tkachov <kyrylo.tkac...@arm.com> >> Subject: Re: [2/3 PATCH]AArch64 use canonical ordering for complex mul, >> fma and fms >> >> Richard Sandiford <richard.sandif...@arm.com> writes: >> > Tamar Christina <tamar.christ...@arm.com> writes: >> >> Hi All, >> >> >> >> After the first patch in the series this updates the optabs to expect >> >> the canonical sequence. >> >> >> >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. >> >> >> >> Ok for master? and backport along with the first patch? >> >> >> >> Thanks, >> >> Tamar >> >> >> >> gcc/ChangeLog: >> >> >> >> PR tree-optimization/102819 >> >> PR tree-optimization/103169 >> >> * config/aarch64/aarch64-simd.md >> (cml<fcmac1><conj_op><mode>4, >> >> cmul<conj_op><mode>3): Use canonical order. >> >> * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4, >> >> cmul<conj_op><mode>3): Likewise. >> >> >> >> --- inline copy of patch -- >> >> diff --git a/gcc/config/aarch64/aarch64-simd.md >> >> b/gcc/config/aarch64/aarch64-simd.md >> >> index >> >> >> f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..875896ee71324712c8034eeff9 >> c >> >> fb5649f9b0e73 100644 >> >> --- a/gcc/config/aarch64/aarch64-simd.md >> >> +++ b/gcc/config/aarch64/aarch64-simd.md >> >> @@ -556,17 +556,17 @@ (define_insn >> "aarch64_fcmlaq_lane<rot><mode>" >> >> ;; remainder. Because of this, expand early. >> >> (define_expand "cml<fcmac1><conj_op><mode>4" >> >> [(set (match_operand:VHSDF 0 "register_operand") >> >> - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") >> >> - (unspec:VHSDF [(match_operand:VHSDF 2 >> "register_operand") >> >> - (match_operand:VHSDF 3 >> "register_operand")] >> >> - FCMLA_OP)))] >> >> + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 >> "register_operand") >> >> + (match_operand:VHSDF 2 >> "register_operand")] >> >> + FCMLA_OP) >> >> + (match_operand:VHSDF 3 "register_operand")))] >> >> "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" >> >> { >> >> rtx tmp = gen_reg_rtx (<MODE>mode); >> >> - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], >> >> - operands[3], operands[2])); >> >> + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], >> >> + operands[1], operands[2])); >> >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, >> >> - operands[3], operands[2])); >> >> + operands[1], operands[2])); >> >> DONE; >> >> }) >> >> >> >> @@ -583,9 +583,9 @@ (define_expand "cmul<conj_op><mode>3" >> >> rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); >> >> rtx res1 = gen_reg_rtx (<MODE>mode); >> >> emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp, >> >> - operands[2], operands[1])); >> >> + operands[1], operands[2])); >> >> emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1, >> >> - operands[2], operands[1])); >> >> + operands[1], operands[2])); >> > >> > This doesn't look right. Going from the documentation, patch 1 isn't >> > changing the operand order for CMUL: the conjugated operand (if there >> > is one) is still operand 2. The FCMLA sequences use the opposite >> > order, where the conjugated operand (if there is one) is operand 1. >> > So I think >> >> I meant “the first multiplication operand” rather than “operand 1” here. >> >> > the reversal here is still needed. >> > >> > Same for the multiplication operands in CML* above. > > I did actually change the order in patch 1, but didn't update the docs.. > That was done because I followed the SLP order again, but now I've updated > them to do what the docs say. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? and backport along with the first patch?
OK, thanks. Richard > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/102819 > PR tree-optimization/103169 > * config/aarch64/aarch64-simd.md (cml<fcmac1><conj_op><mode>4): Use > canonical order. > * config/aarch64/aarch64-sve.md (cml<fcmac1><conj_op><mode>4): Likewise. > > --- inline copy of patch --- > > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..9e41610fba85862ef7675bea1e5731b14cab59ce > 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -556,17 +556,17 @@ (define_insn "aarch64_fcmlaq_lane<rot><mode>" > ;; remainder. Because of this, expand early. > (define_expand "cml<fcmac1><conj_op><mode>4" > [(set (match_operand:VHSDF 0 "register_operand") > - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") > - (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand") > - (match_operand:VHSDF 3 "register_operand")] > - FCMLA_OP)))] > + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand") > + (match_operand:VHSDF 2 "register_operand")] > + FCMLA_OP) > + (match_operand:VHSDF 3 "register_operand")))] > "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" > { > rtx tmp = gen_reg_rtx (<MODE>mode); > - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], > - operands[3], operands[2])); > + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], > + operands[2], operands[1])); > emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, > - operands[3], operands[2])); > + operands[2], operands[1])); > DONE; > }) > > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index > 9ef968840c20a3049901b3f8a919cf27ded1da3e..9ed19017c480b88779e9e3b08c0e031be60a8c12 > 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -7278,11 +7278,11 @@ (define_expand "cml<fcmac1><conj_op><mode>4" > rtx tmp = gen_reg_rtx (<MODE>mode); > emit_insn > (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4], > - operands[3], operands[2], > - operands[1], operands[5])); > + operands[2], operands[1], > + operands[3], operands[5])); > emit_insn > (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4], > - operands[3], operands[2], > + operands[2], operands[1], > tmp, operands[5])); > DONE; > })