Hi Richard, Here's the split off complex add.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Checked with armv8-a+sve2+fp16 and no issues. Note that due to a mid-end limitation SLP for SVE currently fails for some permutes. The tests have these marked as XFAIL. Matching tests for these are in the mid-end patches. Ok for master? The rest will be respun when I have a working tree again and can test them again. Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-simd.md (cadd<rot><mode>3): New. * config/aarch64/iterators.md (SVE2_INT_CADD_OP): New. * config/aarch64/aarch64-sve.md (cadd<rot><mode>3): New. * config/aarch64/aarch64-sve2.md (cadd<rot><mode>3): New. The 12/14/2020 16:11, Richard Sandiford wrote: > Tamar Christina <tamar.christ...@arm.com> writes: > > Hi Richard, > > > > Do you object to me splitting off complex add and addressing your remaining > > feedback later when the rewrite of mul and fma are done. > > No, sounds good to me. > > Thanks, > Richard --
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 68baf416045178b0ebcfeb8de2d201f625f1c317..05d18f8bd3ac09c56c82dc73cff855315eb302b7 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -449,6 +449,14 @@ (define_insn "aarch64_fcadd<rot><mode>" [(set_attr "type" "neon_fcadd")] ) +(define_expand "cadd<rot><mode>3" + [(set (match_operand:VHSDF 0 "register_operand") + (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand") + (match_operand:VHSDF 2 "register_operand")] + FCADD))] + "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" +) + (define_insn "aarch64_fcmla<rot><mode>" [(set (match_operand:VHSDF 0 "register_operand" "=w") (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0") diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 6359c40bdecda6c126bd70bef66561dd1da44dc9..6a5194f54f9c3d7b985890df523ceb4221b0cc63 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -5480,6 +5480,20 @@ (define_expand "@cond_<optab><mode>" "TARGET_SVE" ) +;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer +(define_expand "@cadd<rot><mode>3" + [(set (match_operand:SVE_FULL_F 0 "register_operand") + (unspec:SVE_FULL_F + [(match_dup 3) + (const_int SVE_RELAXED_GP) + (match_operand:SVE_FULL_F 1 "register_operand") + (match_operand:SVE_FULL_F 2 "register_operand")] + SVE_COND_FCADD))] + "TARGET_SVE" +{ + operands[3] = aarch64_ptrue_reg (<VPRED>mode); +}) + ;; Predicated FCADD, merging with the first input. (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 772c35079c9441448534471fba4dba622322b8fc..1897ddf69c34496247e40ba36d97b93a9a9d7670 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -1799,6 +1799,16 @@ (define_insn "@aarch64_sve_<optab><mode>" [(set_attr "movprfx" "*,yes")] ) +;; unpredicated optab pattern for auto-vectorizer +(define_expand "cadd<rot><mode>3" + [(set (match_operand:SVE_FULL_I 0 "register_operand") + (unspec:SVE_FULL_I + [(match_operand:SVE_FULL_I 1 "register_operand") + (match_operand:SVE_FULL_I 2 "register_operand")] + SVE2_INT_CADD_OP))] + "TARGET_SVE2" +) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Complex ternary operations ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index fb1426b7752890848cb49722ef7442d96cb1408b..b8ee4220603ce6da4e8044f61042d98a7e60b17d 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -2598,6 +2598,10 @@ (define_int_iterator SVE2_INT_CMLA [UNSPEC_CMLA UNSPEC_SQRDCMLAH180 UNSPEC_SQRDCMLAH270]) +;; Same as SVE2_INT_CADD but exclude the saturating instructions +(define_int_iterator SVE2_INT_CADD_OP [UNSPEC_CADD90 + UNSPEC_CADD270]) + (define_int_iterator SVE2_INT_CDOT [UNSPEC_CDOT UNSPEC_CDOT90 UNSPEC_CDOT180