https://gcc.gnu.org/g:8da567fce3e3f89c63098280cb376f980f206906
commit r16-6383-g8da567fce3e3f89c63098280cb376f980f206906 Author: Claudio Bantaloukas <[email protected]> Date: Wed Dec 24 11:41:26 2025 +0000 aarch64: add Multi-vector 8-bit floating-point multiply-add long This patch adds support for the following intrinsics when sme-f8f16 is enabled: * svmla_lane_za16[_mf8]_vg2x1_fpm * svmla_lane_za16[_mf8]_vg2x2_fpm * svmla_lane_za16[_mf8]_vg2x4_fpm * svmla_za16[_mf8]_vg2x1_fpm * svmla[_single]_za16[_mf8]_vg2x2_fpm * svmla[_single]_za16[_mf8]_vg2x4_fpm * svmla_za16[_mf8]_vg2x2_fpm * svmla_za16[_mf8]_vg2x4_fpm This patch adds support for the following intrinsics when sme-f8f32 is enabled: * svmla_lane_za32[_mf8]_vg4x1_fpm * svmla_lane_za32[_mf8]_vg4x2_fpm * svmla_lane_za32[_mf8]_vg4x4_fpm * svmla_za32[_mf8]_vg4x1_fpm * svmla[_single]_za32[_mf8]_vg4x2_fpm * svmla[_single]_za32[_mf8]_vg4x4_fpm * svmla_za32[_mf8]_vg4x2_fpm * svmla_za32[_mf8]_vg4x4_fpm Asm tests for the 32 bit versions follow the blueprint set in mla_lane_za32_u8_vg4x1.c mla_za32_u8_vg4x1.c and similar. 16 bit versions follow similar patterns modulo differences in allowed offsets. gcc: * config/aarch64/aarch64-sme.md (@aarch64_sme_<optab><SME_ZA_F8F16_32:mode><SME_ZA_FP8_x24:mode>): Add new define_insn. (*aarch64_sme_<optab><VNx8HI_ONLY:mode><SME_ZA_FP8_x24:mode>_plus, *aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_FP8_x24:mode>_plus, @aarch64_sme_<optab><SME_ZA_F8F16_32:mode><VNx16QI_ONLY:mode>, *aarch64_sme_<optab><VNx8HI_ONLY:mode><VNx16QI_ONLY:mode>_plus, *aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>_plus, @aarch64_sme_single_<optab><SME_ZA_F8F16_32:mode><SME_ZA_FP8_x24:mode>, *aarch64_sme_single_<optab><VNx8HI_ONLY:mode><SME_ZA_FP8_x24:mode>_plus, *aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_FP8_x24:mode>_plus, @aarch64_sme_lane_<optab><SME_ZA_F8F16_32:mode><SME_ZA_FP8_x124:mode>, *aarch64_sme_lane_<optab><VNx8HI_ONLY:mode><SME_ZA_FP8_x124:mode>, *aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_FP8_x124:mode>): Likewise. * config/aarch64/aarch64-sve-builtins-shapes.cc (struct binary_za_slice_lane_base): Support fpm argument. (struct binary_za_slice_opt_single_base): Likewise. * config/aarch64/aarch64-sve-builtins-sme.cc (svmla_za): Extend for fp8. (svmla_lane_za): Likewise. * config/aarch64/aarch64-sve-builtins-sme.def (svmla_lane): Add new DEF_SME_ZA_FUNCTION_GS_FPM entries. (svmla): Likewise. * config/aarch64/iterators.md (SME_ZA_F8F16_32): Add new mode iterator. (SME_ZA_FP8_x24, SME_ZA_FP8_x124): Likewise. (UNSPEC_SME_FMLAL): Add new unspec. (za16_offset_range): Add new mode_attr. (za16_32_long): Likewise. (za16_32_last_offset): Likewise. (SME_FP8_TERNARY_SLICE): Add new iterator. (optab): Add entry for UNSPEC_SME_FMLAL. gcc/testsuite: * gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h: (TEST_ZA_X1, TEST_ZA_XN, TEST_ZA_SINGLE, TEST_ZA_SINGLE_Z15, TEST_ZA_LANE, TEST_ZA_LANE_Z15): Add fpm0 parameter. * gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_1.c: Add tests for variants accepting fpm. * gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_1.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_mf8_vg2x1.c: New test. * gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_mf8_vg2x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_mf8_vg2x4.c: New test. * gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_mf8_vg4x1.c: New test. * gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_mf8_vg4x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_mf8_vg4x4.c: New test. * gcc.target/aarch64/sme2/acle-asm/mla_za16_mf8_vg2x1.c: New test. * gcc.target/aarch64/sme2/acle-asm/mla_za16_mf8_vg2x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/mla_za16_mf8_vg2x4.c: New test. * gcc.target/aarch64/sme2/acle-asm/mla_za32_mf8_vg4x1.c: New test. * gcc.target/aarch64/sme2/acle-asm/mla_za32_mf8_vg4x2.c: New test. * gcc.target/aarch64/sme2/acle-asm/mla_za32_mf8_vg4x4.c: New test. Diff: --- gcc/config/aarch64/aarch64-sme.md | 233 +++++++++++++++++ gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 4 +- gcc/config/aarch64/aarch64-sve-builtins-sme.cc | 5 +- gcc/config/aarch64/aarch64-sve-builtins-sme.def | 8 + gcc/config/aarch64/iterators.md | 19 ++ .../sme2/acle-asm/mla_lane_za16_mf8_vg2x1.c | 167 ++++++++++++ .../sme2/acle-asm/mla_lane_za16_mf8_vg2x2.c | 136 ++++++++++ .../sme2/acle-asm/mla_lane_za16_mf8_vg2x4.c | 142 ++++++++++ .../sme2/acle-asm/mla_lane_za32_mf8_vg4x1.c | 169 ++++++++++++ .../sme2/acle-asm/mla_lane_za32_mf8_vg4x2.c | 137 ++++++++++ .../sme2/acle-asm/mla_lane_za32_mf8_vg4x4.c | 143 ++++++++++ .../aarch64/sme2/acle-asm/mla_za16_mf8_vg2x1.c | 167 ++++++++++++ .../aarch64/sme2/acle-asm/mla_za16_mf8_vg2x2.c | 285 ++++++++++++++++++++ .../aarch64/sme2/acle-asm/mla_za16_mf8_vg2x4.c | 287 ++++++++++++++++++++ .../aarch64/sme2/acle-asm/mla_za32_mf8_vg4x1.c | 167 ++++++++++++ .../aarch64/sme2/acle-asm/mla_za32_mf8_vg4x2.c | 277 ++++++++++++++++++++ .../aarch64/sme2/acle-asm/mla_za32_mf8_vg4x4.c | 289 +++++++++++++++++++++ .../aarch64/sme2/acle-asm/test_sme2_acle.h | 12 +- .../sve/acle/general-c/binary_za_slice_lane_1.c | 14 + .../acle/general-c/binary_za_slice_opt_single_1.c | 16 ++ 20 files changed, 2667 insertions(+), 10 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index 632ef1e47744..e93f83b39834 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -1999,6 +1999,9 @@ ;; - BFMLSL (SME2) ;; - FMLAL (SME2) ;; - FMLSL (SME2) +;; - FMLAL (multiple and indexed vector, FP8 to FP16 and FP8 to FP32, SME2) +;; - FMLAL (multiple and single vector, FP8 to FP16 and FP8 to FP32, SME2) +;; - FMLAL (multiple vectors, FP8 to FP16 and FP8 to FP32, SME2) ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>" @@ -2129,6 +2132,236 @@ } ) +;; svmla_za16[_mf8]_vg2x2_fpm, svmla_za16[_mf8]_vg2x4_fpm +;; svmla_za32[_mf8]_vg4x2_fpm, svmla_za32[_mf8]_vg4x4_fpm +(define_insn "@aarch64_sme_<optab><SME_ZA_F8F16_32:mode><SME_ZA_FP8_x24:mode>" + [(set (reg:SME_ZA_F8F16_32 ZA_REGNUM) + (unspec:SME_ZA_F8F16_32 + [(reg:SME_ZA_F8F16_32 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_FP8_x24 1 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_FP8_x24 2 "aligned_register_operand" "Uw<vector_count>") + (reg:DI FPM_REGNUM)] + SME_FP8_TERNARY_SLICE))] + "TARGET_STREAMING" + "<optab><SME_ZA_F8F16_32:za16_32_long>\tza.<SME_ZA_F8F16_32:Vetype>[%w0, 0:<SME_ZA_F8F16_32:za16_32_last_offset>, vgx<vector_count>], %1, %2" +) + +;; svmla_za16[_mf8]_vg2x2_fpm, svmla_za16[_mf8]_vg2x4_fpm (slice variable + offset) +(define_insn "*aarch64_sme_<optab><VNx8HI_ONLY:mode><SME_ZA_FP8_x24:mode>_plus" + [(set (reg:VNx8HI_ONLY ZA_REGNUM) + (unspec:VNx8HI_ONLY + [(reg:VNx8HI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za16_offset_range>_operand")) + (match_operand:SME_ZA_FP8_x24 2 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_FP8_x24 3 "aligned_register_operand" "Uw<vector_count>") + (reg:DI FPM_REGNUM)] + SME_FP8_TERNARY_SLICE))] + "TARGET_STREAMING_SME_F8F16" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + 1); + return "<optab>\tza.h[%w0, %1:%4, vgx<vector_count>], %2, %3"; + } +) + +;; svmla_za32[_mf8]_vg4x2_fpm, svmla_za32[_mf8]_vg4x4_fpm (slice variable + offset) +(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_FP8_x24:mode>_plus" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za32_offset_range>_operand")) + (match_operand:SME_ZA_FP8_x24 2 "aligned_register_operand" "Uw<vector_count>") + (match_operand:SME_ZA_FP8_x24 3 "aligned_register_operand" "Uw<vector_count>") + (reg:DI FPM_REGNUM)] + SME_FP8_TERNARY_SLICE))] + "TARGET_STREAMING_SME_F8F32" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + 3); + return "<optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3"; + } +) + +;; svmla_za16[_mf8]_vg2x1_fpm, svmla_za32[_mf8]_vg4x1_fpm +(define_insn "@aarch64_sme_<optab><SME_ZA_F8F16_32:mode><VNx16QI_ONLY:mode>" + [(set (reg:SME_ZA_F8F16_32 ZA_REGNUM) + (unspec:SME_ZA_F8F16_32 + [(reg:SME_ZA_F8F16_32 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:VNx16QI_ONLY 1 "register_operand" "w") + (match_operand:VNx16QI_ONLY 2 "register_operand" "x") + (reg:DI FPM_REGNUM)] + SME_FP8_TERNARY_SLICE))] + "TARGET_STREAMING" + "<optab><SME_ZA_F8F16_32:za16_32_long>\tza.<SME_ZA_F8F16_32:Vetype>[%w0, 0:<SME_ZA_F8F16_32:za16_32_last_offset><vg_modifier>], %1.b, %2.b" +) + +;; svmla_za16[_mf8]_vg2x1_fpm (slice variable + offset) +(define_insn "*aarch64_sme_<optab><VNx8HI_ONLY:mode><VNx16QI_ONLY:mode>_plus" + [(set (reg:VNx8HI_ONLY ZA_REGNUM) + (unspec:VNx8HI_ONLY + [(reg:VNx8HI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<VNx16QI_ONLY:za32_offset_range>_operand")) + (match_operand:VNx16QI_ONLY 2 "register_operand" "w") + (match_operand:VNx16QI_ONLY 3 "register_operand" "x") + (reg:DI FPM_REGNUM)] + SME_FP8_TERNARY_SLICE))] + "TARGET_STREAMING_SME_F8F16" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + 1); + return "<optab>\tza.h[%w0, %1:%4<vg_modifier>], %2.b, %3.b"; + } +) + +;; svmla_za32[_mf8]_vg4x1_fpm (slice variable + offset) +(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>_plus" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za32_offset_range>_operand")) + (match_operand:VNx16QI_ONLY 2 "register_operand" "w") + (match_operand:VNx16QI_ONLY 3 "register_operand" "x") + (reg:DI FPM_REGNUM)] + SME_FP8_TERNARY_SLICE))] + "TARGET_STREAMING_SME_F8F32" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + 3); + return "<optab>l\tza.s[%w0, %1:%4<vg_modifier>], %2.b, %3.b"; + } +) + +;; svmla[_single]_za16[_mf8]_vg2x2_fpm, svmla[_single]_za16[_mf8]_vg2x4_fpm, +;; svmla[_single]_za32[_mf8]_vg4x2_fpm, svmla[_single]_za32[_mf8]_vg4x4_fpm +(define_insn "@aarch64_sme_single_<optab><SME_ZA_F8F16_32:mode><SME_ZA_FP8_x24:mode>" + [(set (reg:SME_ZA_F8F16_32 ZA_REGNUM) + (unspec:SME_ZA_F8F16_32 + [(reg:SME_ZA_F8F16_32 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_FP8_x24 1 "register_operand" "w") + (vec_duplicate:SME_ZA_FP8_x24 + (match_operand:<SME_ZA_FP8_x24:VSINGLE> 2 "register_operand" "x")) + (reg:DI FPM_REGNUM)] + SME_FP8_TERNARY_SLICE))] + "TARGET_STREAMING" + "<optab><SME_ZA_F8F16_32:za16_32_long>\tza.<SME_ZA_F8F16_32:Vetype>[%w0, 0:<SME_ZA_F8F16_32:za16_32_last_offset>, vgx<vector_count>], %1, %2.b" +) + +;; svmla[_single]_za16[_mf8]_vg2x2_fpm, svmla[_single]_za16[_mf8]_vg2x4_fpm (slice variable + offset) +(define_insn "*aarch64_sme_single_<optab><VNx8HI_ONLY:mode><SME_ZA_FP8_x24:mode>_plus" + [(set (reg:VNx8HI_ONLY ZA_REGNUM) + (unspec:VNx8HI_ONLY + [(reg:VNx8HI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za16_offset_range>_operand")) + (match_operand:SME_ZA_FP8_x24 2 "register_operand" "w") + (vec_duplicate:SME_ZA_FP8_x24 + (match_operand:<SME_ZA_FP8_x24:VSINGLE> 3 "register_operand" "x")) + (reg:DI FPM_REGNUM)] + SME_FP8_TERNARY_SLICE))] + "TARGET_STREAMING_SME_F8F16" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + 1); + return "<optab>\tza.h[%w0, %1:%4, vgx<vector_count>], %2, %3.b"; + } +) + +;; svmla[_single]_za32[_mf8]_vg4x2_fpm, svmla[_single]_za32[_mf8]_vg4x4_fpm (slice variable + offset) +(define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_FP8_x24:mode>_plus" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za32_offset_range>_operand")) + (match_operand:SME_ZA_FP8_x24 2 "register_operand" "w") + (vec_duplicate:SME_ZA_FP8_x24 + (match_operand:<SME_ZA_FP8_x24:VSINGLE> 3 "register_operand" "x")) + (reg:DI FPM_REGNUM)] + SME_FP8_TERNARY_SLICE))] + "TARGET_STREAMING_SME_F8F32" + { + operands[4] = GEN_INT (INTVAL (operands[1]) + 3); + return "<optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3.b"; + } +) + +;; svmla_lane_za16[_mf8]_vg2x1_fpm, svmla_lane_za32[_mf8]_vg4x1_fpm, +;; svmla_lane_za16[_mf8]_vg2x2_fpm, svmla_lane_za32[_mf8]_vg4x2_fpm, +;; svmla_lane_za16[_mf8]_vg2x4_fpm, svmla_lane_za32[_mf8]_vg4x4_fpm +(define_insn "@aarch64_sme_lane_<optab><SME_ZA_F8F16_32:mode><SME_ZA_FP8_x124:mode>" + [(set (reg:SME_ZA_F8F16_32 ZA_REGNUM) + (unspec:SME_ZA_F8F16_32 + [(reg:SME_ZA_F8F16_32 ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SME_ZA_FP8_x124 1 "<SME_ZA_FP8_x124:aligned_operand>" "<SME_ZA_FP8_x124:aligned_fpr>") + (unspec:SME_ZA_FP8_x124 + [(match_operand:<SME_ZA_FP8_x124:VSINGLE> 2 "register_operand" "x") + (match_operand:SI 3 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT) + (reg:DI FPM_REGNUM)] + SME_FP8_TERNARY_SLICE))] + "TARGET_STREAMING" + "<optab><SME_ZA_F8F16_32:za16_32_long>\tza.<SME_ZA_F8F16_32:Vetype>[%w0, 0:<SME_ZA_F8F16_32:za16_32_last_offset><SME_ZA_FP8_x124:vg_modifier>], %1<SME_ZA_FP8_x124:z_suffix>, %2.b[%3]" +) + +;; svmla_lane_za16[_mf8]_vg2x1_fpm, svmla_lane_za16[_mf8]_vg2x2_fpm, +;; svmla_lane_za16[_mf8]_vg2x4_fpm (slice variable + offset) +(define_insn "*aarch64_sme_lane_<optab><VNx8HI_ONLY:mode><SME_ZA_FP8_x124:mode>" + [(set (reg:VNx8HI_ONLY ZA_REGNUM) + (unspec:VNx8HI_ONLY + [(reg:VNx8HI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za16_offset_range>_operand")) + (match_operand:SME_ZA_FP8_x124 2 "<SME_ZA_FP8_x124:aligned_operand>" "<SME_ZA_FP8_x124:aligned_fpr>") + (unspec:SME_ZA_FP8_x124 + [(match_operand:<SME_ZA_FP8_x124:VSINGLE> 3 "register_operand" "x") + (match_operand:SI 4 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT) + (reg:DI FPM_REGNUM)] + SME_FP8_TERNARY_SLICE))] + "TARGET_STREAMING_SME_F8F16" + { + operands[5] = GEN_INT (INTVAL (operands[1]) + 1); + return "<optab>\tza.h[%w0, %1:%5<SME_ZA_FP8_x124:vg_modifier>], %2<SME_ZA_FP8_x124:z_suffix>, %3.b[%4]"; + } +) + +;; svmla_lane_za32[_mf8]_vg4x1_fpm, svmla_lane_za32[_mf8]_vg4x2_fpm, +;; svmla_lane_za32[_mf8]_vg4x4_fpm (slice variable + offset) +(define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_FP8_x124:mode>" + [(set (reg:VNx4SI_ONLY ZA_REGNUM) + (unspec:VNx4SI_ONLY + [(reg:VNx4SI_ONLY ZA_REGNUM) + (reg:DI SME_STATE_REGNUM) + (plus:SI (match_operand:SI 0 "register_operand" "Uci") + (match_operand:SI 1 "const_<za32_offset_range>_operand")) + (match_operand:SME_ZA_FP8_x124 2 "<aligned_operand>" "<aligned_fpr>") + (unspec:SME_ZA_FP8_x124 + [(match_operand:<VSINGLE> 3 "register_operand" "x") + (match_operand:SI 4 "const_int_operand")] + UNSPEC_SVE_LANE_SELECT) + (reg:DI FPM_REGNUM)] + SME_FP8_TERNARY_SLICE))] + "TARGET_STREAMING_SME_F8F32" + { + operands[5] = GEN_INT (INTVAL (operands[1]) + 3); + return "<optab>l\tza.s[%w0, %1:%5<SME_ZA_FP8_x124:vg_modifier>], %2<z_suffix>, %3.b[%4]"; + } +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Sum of outer products ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index b315dc91cc77..59f313d08f29 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -729,7 +729,7 @@ struct binary_za_slice_lane_base : public overloaded_base<1> resolve (function_resolver &r) const override { sve_type type; - if (!r.check_num_arguments (4) + if (!r.check_num_arguments (r.fpm_mode == FPM_set ? 5: 4) || !r.require_scalar_type (0, "uint32_t") || !(type = r.infer_tuple_type (1)) || !r.require_derived_vector_type (2, 1, type, TCLASS) @@ -758,7 +758,7 @@ struct binary_za_slice_opt_single_base : public overloaded_base<1> resolve (function_resolver &r) const override { sve_type type; - if (!r.check_num_arguments (3) + if (!r.check_num_arguments (r.fpm_mode == FPM_set ? 4: 3) || !r.require_scalar_type (0, "uint32_t") || !(type = r.infer_tuple_type (1))) return error_mark_node; diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc index 4657e29ad64b..43ef05c673ac 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc @@ -640,10 +640,11 @@ FUNCTION (svluti2_lane_zt, svluti_lane_zt_impl, (2)) FUNCTION (svluti4_lane_zt, svluti_lane_zt_impl, (4)) FUNCTION (svluti4_zt, svluti_zt_impl, (4)) FUNCTION (svmla_za, sme_2mode_function, (UNSPEC_SME_SMLA, UNSPEC_SME_UMLA, - UNSPEC_SME_FMLA)) + UNSPEC_SME_FMLA, UNSPEC_SME_FMLAL)) FUNCTION (svmla_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SMLA, UNSPEC_SME_UMLA, - UNSPEC_SME_FMLA)) + UNSPEC_SME_FMLA, + UNSPEC_SME_FMLAL)) FUNCTION (svmls_za, sme_2mode_function, (UNSPEC_SME_SMLS, UNSPEC_SME_UMLS, UNSPEC_SME_FMLS)) FUNCTION (svmls_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SMLS, diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def index c86d5fa730bf..f9ad6837f44b 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def @@ -266,9 +266,17 @@ DEF_SME_FUNCTION_GS (svluti4_zt, luti4_zt, b_integer, x4, none) #undef REQUIRED_EXTENSIONS #define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F8F16) +DEF_SME_ZA_FUNCTION_GS_FPM (svmla_lane, binary_za_slice_lane, za_h_mf8, + vg2, none, set) +DEF_SME_ZA_FUNCTION_GS_FPM (svmla, binary_za_slice_opt_single, za_h_mf8, vg2, none, set) +DEF_SME_ZA_FUNCTION_GS_FPM (svmla, binary_za_slice_opt_single, za_h_mf8, vg1x24, none, set) #undef REQUIRED_EXTENSIONS #define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F8F32) +DEF_SME_ZA_FUNCTION_GS_FPM (svmla_lane, binary_za_slice_lane, za_s_mf8, + vg4, none, set) +DEF_SME_ZA_FUNCTION_GS_FPM (svmla, binary_za_slice_opt_single, za_s_mf8, vg4, none, set) +DEF_SME_ZA_FUNCTION_GS_FPM (svmla, binary_za_slice_opt_single, za_s_mf8, vg1x24, none, set) #undef REQUIRED_EXTENSIONS #undef DEF_SME_ZA_FUNCTION diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index c8a54b80e3b2..56937739518e 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -758,6 +758,13 @@ (define_mode_iterator SME_ZA_HFx124 [VNx8BF VNx16BF VNx32BF VNx8HF VNx16HF VNx32HF]) +(define_mode_iterator SME_ZA_F8F16_32 [(VNx8HI "TARGET_STREAMING_SME_F8F16") + (VNx4SI "TARGET_STREAMING_SME_F8F32")]) + +(define_mode_iterator SME_ZA_FP8_x24 [VNx32QI VNx64QI]) + +(define_mode_iterator SME_ZA_FP8_x124 [VNx16QI VNx32QI VNx64QI]) + (define_mode_iterator SME_ZA_HFx24 [VNx16BF VNx32BF VNx16HF VNx32HF]) (define_mode_iterator SME_ZA_HIx124 [VNx8HI VNx16HI VNx32HI]) @@ -1265,6 +1272,7 @@ UNSPEC_SME_FDOT UNSPEC_SME_FVDOT UNSPEC_SME_FMLA + UNSPEC_SME_FMLAL UNSPEC_SME_FMLS UNSPEC_SME_FMOPA UNSPEC_SME_FMOPS @@ -2682,6 +2690,10 @@ (V4HF "<Vetype>[%4]") (V8HF "<Vetype>[%4]") ]) +(define_mode_attr za16_offset_range [(VNx16QI "0_to_14_step_2") + (VNx32QI "0_to_6_step_2") + (VNx64QI "0_to_6_step_2")]) + (define_mode_attr za32_offset_range [(VNx16QI "0_to_12_step_4") (VNx8BF "0_to_14_step_2") (VNx8HF "0_to_14_step_2") @@ -2702,6 +2714,10 @@ (define_mode_attr za32_long [(VNx16QI "ll") (VNx32QI "ll") (VNx64QI "ll") (VNx8HI "l") (VNx16HI "l") (VNx32HI "l")]) +(define_mode_attr za16_32_long [(VNx4SI "l")(VNx8HI "")]) + +(define_mode_attr za16_32_last_offset [(VNx4SI "3")(VNx8HI "1")]) + (define_mode_attr za32_last_offset [(VNx16QI "3") (VNx32QI "3") (VNx64QI "3") (VNx8HI "1") (VNx16HI "1") (VNx32HI "1")]) @@ -4049,6 +4065,8 @@ (define_int_iterator SME_FP_TERNARY_SLICE [UNSPEC_SME_FMLA UNSPEC_SME_FMLS]) +(define_int_iterator SME_FP8_TERNARY_SLICE [UNSPEC_SME_FMLAL]) + ;; Iterators for atomic operations. (define_int_iterator ATOMIC_LDOP @@ -4198,6 +4216,7 @@ (UNSPEC_SME_FDOT "fdot") (UNSPEC_SME_FVDOT "fvdot") (UNSPEC_SME_FMLA "fmla") + (UNSPEC_SME_FMLAL "fmlal") (UNSPEC_SME_FMLS "fmls") (UNSPEC_SME_FMOPA "fmopa") (UNSPEC_SME_FMOPS "fmops") diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_mf8_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_mf8_vg2x1.c new file mode 100644 index 000000000000..0d500c15e56f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_mf8_vg2x1.c @@ -0,0 +1,167 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f16_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme+sme-f8f16" + +/* +** mla_lane_0_z0_z0_0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.h\[\1, 0:1\], z0\.b, z0\.b\[0\] +** ret +*/ + +TEST_ZA_X1 (mla_lane_0_z0_z0_0, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (0, z0, z0, 0, fpm0), + svmla_lane_za16_vg2x1_fpm (0, z0, z0, 0, fpm0)) + +/* +** mla_lane_w0_z0_z3_1: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlal za\.h\[\1, 0:1\], z0\.b, z3\.b\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w0, z0, z3, 1, fpm0), + svmla_lane_za16_vg2x1_fpm (w0, z0, z3, 1, fpm0)) + +/* +** mla_lane_w7_z0_z3_2: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w7 +** fmlal za\.h\[\1, 0:1\], z0\.b, z3\.b\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w7, z0, z3, 2, fpm0), + svmla_lane_za16_vg2x1_fpm (w7, z0, z3, 2, fpm0)) + +/* +** mla_lane_w8_z7_z3_3: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1\], z7\.b, z3\.b\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w8, z7, z3, 3, fpm0), + svmla_lane_za16_vg2x1_fpm (w8, z7, z3, 3, fpm0)) + +/* +** mla_lane_w8_z31_z16_4: +** msr fpmr, x1 +** mov (z[0-7])\.d, z16\.d +** fmlal za\.h\[w8, 0:1\], z31\.b. \1\.b\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w8, z31, z16, 4, fpm0), + svmla_lane_za16_vg2x1_fpm (w8, z31, z16, 4, fpm0)) + +/* +** mla_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1\], z0\.b, z0\.b\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w8 + 1, z0, z0, 5, fpm0), + svmla_lane_za16_vg2x1_fpm (w8 + 1, z0, z0, 5, fpm0)) + +/* +** mla_lane_w8p2_z23_z0_6: +** msr fpmr, x1 +** fmlal za\.h\[w8, 2:3\], z23\.b, z0\.b\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w8 + 2, z23, z0, 6, fpm0), + svmla_lane_za16_vg2x1_fpm (w8 + 2, z23, z0, 6, fpm0)) + +/* +** mla_lane_w11p6_z23_z0_7: +** msr fpmr, x1 +** fmlal za\.h\[w11, 6:7\], z23\.b, z0\.b\[7\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p6_z23_z0_7, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w11 + 6, z23, z0, 7, fpm0), + svmla_lane_za16_vg2x1_fpm (w11 + 6, z23, z0, 7, fpm0)) + +/* +** mla_lane_w8p7_z7_z7_8: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1\], z7\.b, z7\.b\[8\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p7_z7_z7_8, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w8 + 7, z7, z7, 8, fpm0), + svmla_lane_za16_vg2x1_fpm (w8 + 7, z7, z7, 8, fpm0)) + +/* +** mla_lane_w11p12_z23_z0_7: +** msr fpmr, x1 +** fmlal za\.h\[w11, 12:13\], z23\.b, z0\.b\[7\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p12_z23_z0_7, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w11 + 12, z23, z0, 7, fpm0), + svmla_lane_za16_vg2x1_fpm (w11 + 12, z23, z0, 7, fpm0)) + +/* +** mla_lane_w8p14_z23_z0_10: +** msr fpmr, x1 +** fmlal za\.h\[w8, 14:15\], z23\.b, z0\.b\[10\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p14_z23_z0_10, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w8 + 14, z23, z0, 10, fpm0), + svmla_lane_za16_vg2x1_fpm (w8 + 14, z23, z0, 10, fpm0)) + +/* +** mla_lane_w8p15_z7_z7_11: +** add (w8|w9|w10|w11), w8, #?15 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1\], z7\.b, z7\.b\[11\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p15_z7_z7_11, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w8 + 15, z7, z7, 11, fpm0), + svmla_lane_za16_vg2x1_fpm (w8 + 15, z7, z7, 11, fpm0)) + +/* +** mla_lane_w8p16_z7_z7_12: +** add (w8|w9|w10|w11), w8, #?16 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1\], z7\.b, z7\.b\[12\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p16_z7_z7_12, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w8 + 16, z7, z7, 12, fpm0), + svmla_lane_za16_vg2x1_fpm (w8 + 16, z7, z7, 12, fpm0)) + +/* +** mla_lane_w8m1_z16_z0_13: +** sub (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1\], z16\.b, z0\.b\[13\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8m1_z16_z0_13, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w8 - 1, z16, z0, 13, fpm0), + svmla_lane_za16_vg2x1_fpm (w8 - 1, z16, z0, 13, fpm0)) + +/* +** mla_lane_w12_z0_z3_15: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w12 +** fmlal za\.h\[\1, 0:1\], z0\.b, z3\.b\[15\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w12_z0_z3_15, svmfloat8_t, + svmla_lane_za16_mf8_vg2x1_fpm (w12, z0, z3, 15, fpm0), + svmla_lane_za16_vg2x1_fpm (w12, z0, z3, 15, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_mf8_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_mf8_vg2x2.c new file mode 100644 index 000000000000..bba907e6dbc2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_mf8_vg2x2.c @@ -0,0 +1,136 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f16_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme-f8f16" + +/* +** mla_lane_0_z0_z4_0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.h\[\1, 0:1, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x2_fpm (0, z0, z4, 0, fpm0), + svmla_lane_za16_vg2x2_fpm (0, z0, z4, 0, fpm0)) + +/* +** mla_lane_w0_z0_z7_1: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlal za\.h\[\1, 0:1, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x2_fpm (w0, z0, z7, 1, fpm0), + svmla_lane_za16_vg2x2_fpm (w0, z0, z7, 1, fpm0)) + +/* +** mla_lane_w8_z28_z4_2: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x2_fpm (w8, z28, z4, 2, fpm0), + svmla_lane_za16_vg2x2_fpm (w8, z28, z4, 2, fpm0)) + +/* +** mla_lane_w11p2_z0_z4_3: +** msr fpmr, x1 +** fmlal za\.h\[w11, 2:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w11p2_z0_z4_3, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x2_fpm (w11 + 2, z0, z4, 3, fpm0), + svmla_lane_za16_vg2x2_fpm (w11 + 2, z0, z4, 3, fpm0)) + +/* +** mla_lane_w8p6_z0_z4_4: +** msr fpmr, x1 +** fmlal za\.h\[w8, 6:7, vgx2\], {z0\.b - z1\.b}, z4\.b\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x2_fpm (w8 + 6, z0, z4, 4, fpm0), + svmla_lane_za16_vg2x2_fpm (w8 + 6, z0, z4, 4, fpm0)) + +/* +** mla_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx2\], {z0\.b - z1\.b}, z4\.b\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x2_fpm (w8 + 7, z0, z4, 5, fpm0), + svmla_lane_za16_vg2x2_fpm (w8 + 7, z0, z4, 5, fpm0)) + +/* +** mla_lane_w8p8_z0_z4_7: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx2\], {z0\.b - z1\.b}, z4\.b\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_7, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x2_fpm (w8 + 8, z0, z4, 7, fpm0), + svmla_lane_za16_vg2x2_fpm (w8 + 8, z0, z4, 7, fpm0)) + +/* +** mla_lane_w0m1_z0_z4_9: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx2\], {z0\.b - z1\.b}, z4\.b\[9\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_9, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x2_fpm (w0 - 1, z0, z4, 9, fpm0), + svmla_lane_za16_vg2x2_fpm (w0 - 1, z0, z4, 9, fpm0)) + +/* +** mla_lane_w8_z4_z15_10: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx2\], {z4\.b - z5\.b}, z15\.b\[10\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_10, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x2_fpm (w8, z4, z15, 10, fpm0), + svmla_lane_za16_vg2x2_fpm (w8, z4, z15, 10, fpm0)) + +/* +** mla_lane_w8_z28_z16_11: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fmlal za\.h\[w8, 0:1, vgx2\], {z28\.b - z29\.b}, \1\.b\[11\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_11, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x2_fpm (w8, z28, z16, 11, fpm0), + svmla_lane_za16_vg2x2_fpm (w8, z28, z16, 11, fpm0)) + +/* +** mla_lane_w8_z17_z7_13: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** fmlal za\.h\[w8, 0:1, vgx2\], [^\n]+, z7\.b\[13\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_13, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x2_fpm (w8, z17, z7, 13, fpm0), + svmla_lane_za16_vg2x2_fpm (w8, z17, z7, 13, fpm0)) + +/* +** mla_lane_w8_z22_z4_15: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx2\], {z22\.b - z23\.b}, z4\.b\[15\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_15, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x2_fpm (w8, z22, z4, 15, fpm0), + svmla_lane_za16_vg2x2_fpm (w8, z22, z4, 15, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_mf8_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_mf8_vg2x4.c new file mode 100644 index 000000000000..bdce691bc81e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_mf8_vg2x4.c @@ -0,0 +1,142 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f16_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme-f8f16" + +/* +** mla_lane_0_z0_z4_0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.h\[\1, 0:1, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x4_fpm (0, z0, z4, 0, fpm0), + svmla_lane_za16_vg2x4_fpm (0, z0, z4, 0, fpm0)) + +/* +** mla_lane_w0_z0_z7_1: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlal za\.h\[\1, 0:1, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x4_fpm (w0, z0, z7, 1, fpm0), + svmla_lane_za16_vg2x4_fpm (w0, z0, z7, 1, fpm0)) + +/* +** mla_lane_w8_z28_z4_2: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x4_fpm (w8, z28, z4, 2, fpm0), + svmla_lane_za16_vg2x4_fpm (w8, z28, z4, 2, fpm0)) + +/* +** mla_lane_w11p2_z0_z4_7: +** msr fpmr, x1 +** fmlal za\.h\[w11, 2:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w11p2_z0_z4_7, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x4_fpm (w11 + 2, z0, z4, 7, fpm0), + svmla_lane_za16_vg2x4_fpm (w11 + 2, z0, z4, 7, fpm0)) + +/* +** mla_lane_w8p6_z0_z4_8: +** msr fpmr, x1 +** fmlal za\.h\[w8, 6:7, vgx4\], {z0\.b - z3\.b}, z4\.b\[8\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_8, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x4_fpm (w8 + 6, z0, z4, 8, fpm0), + svmla_lane_za16_vg2x4_fpm (w8 + 6, z0, z4, 8, fpm0)) + +/* +** mla_lane_w8p7_z0_z4_9: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx4\], {z0\.b - z3\.b}, z4\.b\[9\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_9, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x4_fpm (w8 + 7, z0, z4, 9, fpm0), + svmla_lane_za16_vg2x4_fpm (w8 + 7, z0, z4, 9, fpm0)) + +/* +** mla_lane_w8p8_z0_z4_10: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx4\], {z0\.b - z3\.b}, z4\.b\[10\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_10, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x4_fpm (w8 + 8, z0, z4, 10, fpm0), + svmla_lane_za16_vg2x4_fpm (w8 + 8, z0, z4, 10, fpm0)) + +/* +** mla_lane_w0m1_z0_z4_11: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx4\], {z0\.b - z3\.b}, z4\.b\[11\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_11, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x4_fpm (w0 - 1, z0, z4, 11, fpm0), + svmla_lane_za16_vg2x4_fpm (w0 - 1, z0, z4, 11, fpm0)) + +/* +** mla_lane_w8_z4_z15_12: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx4\], {z4\.b - z7\.b}, z15\.b\[12\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_12, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x4_fpm (w8, z4, z15, 12, fpm0), + svmla_lane_za16_vg2x4_fpm (w8, z4, z15, 12, fpm0)) + +/* +** mla_lane_w8_z28_z16_13: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fmlal za\.h\[w8, 0:1, vgx4\], {z28\.b - z31\.b}, \1\.b\[13\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_13, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x4_fpm (w8, z28, z16, 13, fpm0), + svmla_lane_za16_vg2x4_fpm (w8, z28, z16, 13, fpm0)) + +/* +** mla_lane_w8_z17_z7_14: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmlal za\.h\[w8, 0:1, vgx4\], [^\n]+, z7\.b\[14\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_14, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x4_fpm (w8, z17, z7, 14, fpm0), + svmla_lane_za16_vg2x4_fpm (w8, z17, z7, 14, fpm0)) + +/* +** mla_lane_w8_z22_z4_15: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmlal za\.h\[w8, 0:1, vgx4\], [^\n]+, z4\.b\[15\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_15, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za16_mf8_vg2x4_fpm (w8, z22, z4, 15, fpm0), + svmla_lane_za16_vg2x4_fpm (w8, z22, z4, 15, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_mf8_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_mf8_vg4x1.c new file mode 100644 index 000000000000..3dc3ff72110f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_mf8_vg4x1.c @@ -0,0 +1,169 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme+sme-f8f32" + +/* +** mla_lane_0_z0_z0_0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlall za\.s\[\1, 0:3\], z0\.b, z0\.b\[0\] +** ret +*/ + +TEST_ZA_X1 (mla_lane_0_z0_z0_0, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (0, z0, z0, 0, fpm0), + svmla_lane_za32_vg4x1_fpm (0, z0, z0, 0, fpm0)) + +/* +** mla_lane_w0_z0_z3_1: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[1\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w0_z0_z3_1, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w0, z0, z3, 1, fpm0), + svmla_lane_za32_vg4x1_fpm (w0, z0, z3, 1, fpm0)) + +/* +** mla_lane_w7_z0_z3_2: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w7 +** fmlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[2\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w7_z0_z3_2, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w7, z0, z3, 2, fpm0), + svmla_lane_za32_vg4x1_fpm (w7, z0, z3, 2, fpm0)) + +/* +** mla_lane_w8_z7_z3_3: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3\], z7\.b, z3\.b\[3\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z7_z3_3, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w8, z7, z3, 3, fpm0), + svmla_lane_za32_vg4x1_fpm (w8, z7, z3, 3, fpm0)) + +/* +** mla_lane_w8_z31_z16_4: +** msr fpmr, x1 +** mov (z[0-7])\.d, z16\.d +** fmlall za\.s\[w8, 0:3\], z31\.b. \1\.b\[4\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8_z31_z16_4, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w8, z31, z16, 4, fpm0), + svmla_lane_za32_vg4x1_fpm (w8, z31, z16, 4, fpm0)) + +/* +** mla_lane_w8p1_z0_z0_5: +** add (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3\], z0\.b, z0\.b\[5\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p1_z0_z0_5, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w8 + 1, z0, z0, 5, fpm0), + svmla_lane_za32_vg4x1_fpm (w8 + 1, z0, z0, 5, fpm0)) + +/* +** mla_lane_w8p2_z23_z0_6: +** add (w8|w9|w10|w11), w8, #?2 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3\], z23\.b, z0\.b\[6\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p2_z23_z0_6, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w8 + 2, z23, z0, 6, fpm0), + svmla_lane_za32_vg4x1_fpm (w8 + 2, z23, z0, 6, fpm0)) + +/* +** mla_lane_w11p4_z23_z0_7: +** msr fpmr, x1 +** fmlall za\.s\[w11, 4:7\], z23\.b, z0\.b\[7\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p4_z23_z0_7, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w11 + 4, z23, z0, 7, fpm0), + svmla_lane_za32_vg4x1_fpm (w11 + 4, z23, z0, 7, fpm0)) + +/* +** mla_lane_w8p7_z7_z7_8: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[8\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p7_z7_z7_8, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w8 + 7, z7, z7, 8, fpm0), + svmla_lane_za32_vg4x1_fpm (w8 + 7, z7, z7, 8, fpm0)) + +/* +** mla_lane_w11p12_z23_z0_9: +** msr fpmr, x1 +** fmlall za\.s\[w11, 12:15\], z23\.b, z0\.b\[9\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w11p12_z23_z0_9, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w11 + 12, z23, z0, 9, fpm0), + svmla_lane_za32_vg4x1_fpm (w11 + 12, z23, z0, 9, fpm0)) + +/* +** mla_lane_w8p14_z23_z0_10: +** add (w8|w9|w10|w11), w8, #?14 +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3\], z23\.b, z0\.b\[10\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p14_z23_z0_10, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w8 + 14, z23, z0, 10, fpm0), + svmla_lane_za32_vg4x1_fpm (w8 + 14, z23, z0, 10, fpm0)) + +/* +** mla_lane_w8p15_z7_z7_11: +** add (w8|w9|w10|w11), w8, #?15 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[11\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p15_z7_z7_11, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w8 + 15, z7, z7, 11, fpm0), + svmla_lane_za32_vg4x1_fpm (w8 + 15, z7, z7, 11, fpm0)) + +/* +** mla_lane_w8p16_z7_z7_12: +** add (w8|w9|w10|w11), w8, #?16 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3\], z7\.b, z7\.b\[12\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8p16_z7_z7_12, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w8 + 16, z7, z7, 12, fpm0), + svmla_lane_za32_vg4x1_fpm (w8 + 16, z7, z7, 12, fpm0)) + +/* +** mla_lane_w8m1_z16_z0_13: +** sub (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3\], z16\.b, z0\.b\[13\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w8m1_z16_z0_13, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w8 - 1, z16, z0, 13, fpm0), + svmla_lane_za32_vg4x1_fpm (w8 - 1, z16, z0, 13, fpm0)) + +/* +** mla_lane_w12_z0_z3_15: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w12 +** fmlall za\.s\[\1, 0:3\], z0\.b, z3\.b\[15\] +** ret +*/ +TEST_ZA_X1 (mla_lane_w12_z0_z3_15, svmfloat8_t, + svmla_lane_za32_mf8_vg4x1_fpm (w12, z0, z3, 15, fpm0), + svmla_lane_za32_vg4x1_fpm (w12, z0, z3, 15, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_mf8_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_mf8_vg4x2.c new file mode 100644 index 000000000000..7717aabfd2ae --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_mf8_vg4x2.c @@ -0,0 +1,137 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme-f8f32" + +/* +** mla_lane_0_z0_z4_0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x2_fpm (0, z0, z4, 0, fpm0), + svmla_lane_za32_vg4x2_fpm (0, z0, z4, 0, fpm0)) + +/* +** mla_lane_w0_z0_z7_1: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x2_fpm (w0, z0, z7, 1, fpm0), + svmla_lane_za32_vg4x2_fpm (w0, z0, z7, 1, fpm0)) + +/* +** mla_lane_w8_z28_z4_2: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x2_fpm (w8, z28, z4, 2, fpm0), + svmla_lane_za32_vg4x2_fpm (w8, z28, z4, 2, fpm0)) + +/* +** mla_lane_w11p4_z0_z4_3: +** msr fpmr, x1 +** fmlall za\.s\[w11, 4:7, vgx2\], {z0\.b - z1\.b}, z4\.b\[3\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w11p4_z0_z4_3, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x2_fpm (w11 + 4, z0, z4, 3, fpm0), + svmla_lane_za32_vg4x2_fpm (w11 + 4, z0, z4, 3, fpm0)) + +/* +** mla_lane_w8p6_z0_z4_4: +** add (w8|w9|w10|w11), w8, #?6 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[4\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_4, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x2_fpm (w8 + 6, z0, z4, 4, fpm0), + svmla_lane_za32_vg4x2_fpm (w8 + 6, z0, z4, 4, fpm0)) + +/* +** mla_lane_w8p7_z0_z4_5: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[5\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_5, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x2_fpm (w8 + 7, z0, z4, 5, fpm0), + svmla_lane_za32_vg4x2_fpm (w8 + 7, z0, z4, 5, fpm0)) + +/* +** mla_lane_w8p8_z0_z4_7: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_7, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x2_fpm (w8 + 8, z0, z4, 7, fpm0), + svmla_lane_za32_vg4x2_fpm (w8 + 8, z0, z4, 7, fpm0)) + +/* +** mla_lane_w0m1_z0_z4_9: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, z4\.b\[9\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_9, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x2_fpm (w0 - 1, z0, z4, 9, fpm0), + svmla_lane_za32_vg4x2_fpm (w0 - 1, z0, z4, 9, fpm0)) + +/* +** mla_lane_w8_z4_z15_10: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, z15\.b\[10\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_10, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x2_fpm (w8, z4, z15, 10, fpm0), + svmla_lane_za32_vg4x2_fpm (w8, z4, z15, 10, fpm0)) + +/* +** mla_lane_w8_z28_z16_11: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fmlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, \1\.b\[11\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_11, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x2_fpm (w8, z28, z16, 11, fpm0), + svmla_lane_za32_vg4x2_fpm (w8, z28, z16, 11, fpm0)) + +/* +** mla_lane_w8_z17_z7_13: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** fmlall za\.s\[w8, 0:3, vgx2\], [^\n]+, z7\.b\[13\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_13, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x2_fpm (w8, z17, z7, 13, fpm0), + svmla_lane_za32_vg4x2_fpm (w8, z17, z7, 13, fpm0)) + +/* +** mla_lane_w8_z22_z4_15: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx2\], {z22\.b - z23\.b}, z4\.b\[15\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_15, svmfloat8x2_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x2_fpm (w8, z22, z4, 15, fpm0), + svmla_lane_za32_vg4x2_fpm (w8, z22, z4, 15, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_mf8_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_mf8_vg4x4.c new file mode 100644 index 000000000000..159b1048c847 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za32_mf8_vg4x4.c @@ -0,0 +1,143 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme-f8f32" + +/* +** mla_lane_0_z0_z4_0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[0\] +** ret +*/ +TEST_ZA_LANE (mla_lane_0_z0_z4_0, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x4_fpm (0, z0, z4, 0, fpm0), + svmla_lane_za32_vg4x4_fpm (0, z0, z4, 0, fpm0)) + +/* +** mla_lane_w0_z0_z7_1: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z7\.b\[1\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x4_fpm (w0, z0, z7, 1, fpm0), + svmla_lane_za32_vg4x4_fpm (w0, z0, z7, 1, fpm0)) + +/* +** mla_lane_w8_z28_z4_2: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, z4\.b\[2\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x4_fpm (w8, z28, z4, 2, fpm0), + svmla_lane_za32_vg4x4_fpm (w8, z28, z4, 2, fpm0)) + +/* +** mla_lane_w11p4_z0_z4_7: +** msr fpmr, x1 +** fmlall za\.s\[w11, 4:7, vgx4\], {z0\.b - z3\.b}, z4\.b\[7\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w11p4_z0_z4_7, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x4_fpm (w11 + 4, z0, z4, 7, fpm0), + svmla_lane_za32_vg4x4_fpm (w11 + 4, z0, z4, 7, fpm0)) + +/* +** mla_lane_w8p6_z0_z4_8: +** add (w8|w9|w10|w11), w8, #?6 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[8\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p6_z0_z4_8, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x4_fpm (w8 + 6, z0, z4, 8, fpm0), + svmla_lane_za32_vg4x4_fpm (w8 + 6, z0, z4, 8, fpm0)) + +/* +** mla_lane_w8p7_z0_z4_9: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[9\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p7_z0_z4_9, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x4_fpm (w8 + 7, z0, z4, 9, fpm0), + svmla_lane_za32_vg4x4_fpm (w8 + 7, z0, z4, 9, fpm0)) + +/* +** mla_lane_w8p8_z0_z4_10: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[10\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8p8_z0_z4_10, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x4_fpm (w8 + 8, z0, z4, 10, fpm0), + svmla_lane_za32_vg4x4_fpm (w8 + 8, z0, z4, 10, fpm0)) + +/* +** mla_lane_w0m1_z0_z4_11: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, z4\.b\[11\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w0m1_z0_z4_11, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x4_fpm (w0 - 1, z0, z4, 11, fpm0), + svmla_lane_za32_vg4x4_fpm (w0 - 1, z0, z4, 11, fpm0)) + +/* +** mla_lane_w8_z4_z15_12: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, z15\.b\[12\] +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_12, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x4_fpm (w8, z4, z15, 12, fpm0), + svmla_lane_za32_vg4x4_fpm (w8, z4, z15, 12, fpm0)) + +/* +** mla_lane_w8_z28_z16_13: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fmlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, \1\.b\[13\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z28_z16_13, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x4_fpm (w8, z28, z16, 13, fpm0), + svmla_lane_za32_vg4x4_fpm (w8, z28, z16, 13, fpm0)) + +/* +** mla_lane_w8_z17_z7_14: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmlall za\.s\[w8, 0:3, vgx4\], [^\n]+, z7\.b\[14\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z17_z7_14, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x4_fpm (w8, z17, z7, 14, fpm0), + svmla_lane_za32_vg4x4_fpm (w8, z17, z7, 14, fpm0)) + +/* +** mla_lane_w8_z22_z4_15: +** msr fpmr, x1 +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** mov [^\n]+ +** fmlall za\.s\[w8, 0:3, vgx4\], [^\n]+, z4\.b\[15\] +** ret +*/ +TEST_ZA_LANE (mla_lane_w8_z22_z4_15, svmfloat8x4_t, svmfloat8_t, + svmla_lane_za32_mf8_vg4x4_fpm (w8, z22, z4, 15, fpm0), + svmla_lane_za32_vg4x4_fpm (w8, z22, z4, 15, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_mf8_vg2x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_mf8_vg2x1.c new file mode 100644 index 000000000000..1c67705ab6cf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_mf8_vg2x1.c @@ -0,0 +1,167 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f16_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme+sme-f8f16" + +/* +** mla_0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.h\[\1, 0:1\], z0\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_0_z0_z0, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (0, z0, z0, fpm0), + svmla_za16_vg2x1_fpm (0, z0, z0, fpm0)) + +/* +** mla_w0_z0_z3: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlal za\.h\[\1, 0:1\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w0_z0_z3, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w0, z0, z3, fpm0), + svmla_za16_vg2x1_fpm (w0, z0, z3, fpm0)) + +/* +** mla_w7_z0_z3: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w7 +** fmlal za\.h\[\1, 0:1\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w7_z0_z3, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w7, z0, z3, fpm0), + svmla_za16_vg2x1_fpm (w7, z0, z3, fpm0)) + +/* +** mla_w8_z7_z3: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1\], z7\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w8_z7_z3, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w8, z7, z3, fpm0), + svmla_za16_vg2x1_fpm (w8, z7, z3, fpm0)) + +/* +** mla_w8_z31_z16: +** msr fpmr, x1 +** mov (z[0-7])\.d, z16\.d +** fmlal za\.h\[w8, 0:1\], z31\.b. \1\.b +** ret +*/ +TEST_ZA_X1 (mla_w8_z31_z16, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w8, z31, z16, fpm0), + svmla_za16_vg2x1_fpm (w8, z31, z16, fpm0)) + +/* +** mla_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1\], z0\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p1_z0_z0, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w8 + 1, z0, z0, fpm0), + svmla_za16_vg2x1_fpm (w8 + 1, z0, z0, fpm0)) + +/* +** mla_w10p4_z23_z0: +** msr fpmr, x1 +** fmlal za\.h\[w10, 4:5\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w10p4_z23_z0, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w10 + 4, z23, z0, fpm0), + svmla_za16_vg2x1_fpm (w10 + 4, z23, z0, fpm0)) + +/* +** mla_w11p6_z23_z0: +** add (w8|w9|w10|w11), w11, #?6 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w11p6_z23_z0, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w11 + 6, z23, z0, fpm0), + svmla_za16_vg2x1_fpm (w11 + 6, z23, z0, fpm0)) + +/* +** mla_w9p8_z7_z7: +** msr fpmr, x1 +** fmlal za\.h\[w9, 8:9\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mla_w9p8_z7_z7, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w9 + 8, z7, z7, fpm0), + svmla_za16_vg2x1_fpm (w9 + 8, z7, z7, fpm0)) + +/* +** mla_w11p12_z23_z0: +** msr fpmr, x1 +** fmlal za\.h\[w11, 12:13\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w11p12_z23_z0, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w11 + 12, z23, z0, fpm0), + svmla_za16_vg2x1_fpm (w11 + 12, z23, z0, fpm0)) + +/* +** mla_w8p14_z23_z0: +** add (w8|w9|w10|w11), w8, #?14 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p14_z23_z0, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w8 + 14, z23, z0, fpm0), + svmla_za16_vg2x1_fpm (w8 + 14, z23, z0, fpm0)) + +/* +** mla_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p15_z7_z7, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w8 + 15, z7, z7, fpm0), + svmla_za16_vg2x1_fpm (w8 + 15, z7, z7, fpm0)) + +/* +** mla_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p16_z7_z7, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w8 + 16, z7, z7, fpm0), + svmla_za16_vg2x1_fpm (w8 + 16, z7, z7, fpm0)) + +/* +** mla_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1\], z16\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w8m1_z16_z0, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w8 - 1, z16, z0, fpm0), + svmla_za16_vg2x1_fpm (w8 - 1, z16, z0, fpm0)) + +/* +** mla_w12_z0_z3: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w12 +** fmlal za\.h\[\1, 0:1\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w12_z0_z3, svmfloat8_t, + svmla_za16_mf8_vg2x1_fpm (w12, z0, z3, fpm0), + svmla_za16_vg2x1_fpm (w12, z0, z3, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_mf8_vg2x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_mf8_vg2x2.c new file mode 100644 index 000000000000..8dc613bb3c77 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_mf8_vg2x2.c @@ -0,0 +1,285 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f16_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme+sme-f8f16" + +/* +** mla_0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.h\[\1, 0:1, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (0, z0, z0, fpm0), + svmla_za16_vg2x2_fpm (0, z0, z0, fpm0)) + +/* +** mla_w0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlal za\.h\[\1, 0:1, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w0, z0, z0, fpm0), + svmla_za16_vg2x2_fpm (w0, z0, z0, fpm0)) + +/* +** mla_w8_z0_z4: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w8, z0, z4, fpm0), + svmla_za16_vg2x2_fpm (w8, z0, z4, fpm0)) + +/* +** mla_w8_z4_z18: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w8, z4, z18, fpm0), + svmla_za16_vg2x2_fpm (w8, z4, z18, fpm0)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z23: +** msr fpmr, x1 +** ... +** fmlal za\.h\[w8, 0:1, vgx2\], {z0\.b - z1\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w8, z0, z23, fpm0), + svmla_za16_vg2x2_fpm (w8, z0, z23, fpm0)) + +/* +** mla_w8_z23_z0: +** msr fpmr, x1 +** ... +** fmlal za\.h\[w8, 0:1, vgx2\], [^\n]+, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w8, z23, z0, fpm0), + svmla_za16_vg2x2_fpm (w8, z23, z0, fpm0)) + +/* +** mla_w8_z18_z28: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w8, z18, z28, fpm0), + svmla_za16_vg2x2_fpm (w8, z18, z28, fpm0)) + +/* +** mla_w8_z28_z4: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z4, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w8, z28, z4, fpm0), + svmla_za16_vg2x2_fpm (w8, z28, z4, fpm0)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w8 + 1, z4, z0, fpm0), + svmla_za16_vg2x2_fpm (w8 + 1, z4, z0, fpm0)) + +/* +** mla_w8p2_z4_z0: +** msr fpmr, x1 +** fmlal za\.h\[w8, 2:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w8 + 2, z4, z0, fpm0), + svmla_za16_vg2x2_fpm (w8 + 2, z4, z0, fpm0)) + +/* +** mla_w11p4_z4_z0: +** msr fpmr, x1 +** fmlal za\.h\[w11, 4:5, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w11p4_z4_z0, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w11 + 4, z4, z0, fpm0), + svmla_za16_vg2x2_fpm (w11 + 4, z4, z0, fpm0)) + +/* +** mla_w11p6_z4_z0: +** msr fpmr, x1 +** fmlal za\.h\[w11, 6:7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w11p6_z4_z0, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w11 + 6, z4, z0, fpm0), + svmla_za16_vg2x2_fpm (w11 + 6, z4, z0, fpm0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w8 + 7, z4, z0, fpm0), + svmla_za16_vg2x2_fpm (w8 + 7, z4, z0, fpm0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w8 + 8, z4, z4, fpm0), + svmla_za16_vg2x2_fpm (w8 + 8, z4, z4, fpm0)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svmfloat8x2_t, + svmla_za16_mf8_vg2x2_fpm (w8 - 1, z4, z0, fpm0), + svmla_za16_vg2x2_fpm (w8 - 1, z4, z0, fpm0)) + +/* +** mla_single_0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.h\[\1, 0:1, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x2_fpm (0, z1, z0, fpm0), + svmla_za16_vg2x2_fpm (0, z1, z0, fpm0)) + +/* +** mla_single_w0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlal za\.h\[\1, 0:1, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x2_fpm (w0, z1, z0, fpm0), + svmla_za16_vg2x2_fpm (w0, z1, z0, fpm0)) + +/* +** mla_single_w8_z1_z0: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x2_fpm (w8, z1, z0, fpm0), + svmla_za16_vg2x2_fpm (w8, z1, z0, fpm0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x2_fpm (w8 + 1, z1, z0, fpm0), + svmla_za16_vg2x2_fpm (w8 + 1, z1, z0, fpm0)) + +/* +** mla_single_w8p2_z20_z0: +** msr fpmr, x1 +** fmlal za\.h\[w8, 2:3, vgx2\], {z20\.b - z21\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p2_z20_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x2_fpm (w8 + 2, z20, z0, fpm0), + svmla_za16_vg2x2_fpm (w8 + 2, z20, z0, fpm0)) + +/* +** mla_single_w11p6_z27_z0: +** msr fpmr, x1 +** fmlal za\.h\[w11, 6:7, vgx2\], {z27\.b - z28\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w11p6_z27_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x2_fpm (w11 + 6, z27, z0, fpm0), + svmla_za16_vg2x2_fpm (w11 + 6, z27, z0, fpm0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x2_fpm (w8 + 7, z1, z0, fpm0), + svmla_za16_vg2x2_fpm (w8 + 7, z1, z0, fpm0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x2_fpm (w8 + 8, z1, z0, fpm0), + svmla_za16_vg2x2_fpm (w8 + 8, z1, z0, fpm0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x2_fpm (w0 - 1, z1, z0, fpm0), + svmla_za16_vg2x2_fpm (w0 - 1, z1, z0, fpm0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx2\], {z0\.b - z1\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svmfloat8x2_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x2_fpm (w8, z0, z15, fpm0), + svmla_za16_vg2x2_fpm (w8, z0, z15, fpm0)) + +/* +** mla_single_w8_z20_z16: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fmlal za\.h\[w8, 0:1, vgx2\], {z20\.b - z21\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svmfloat8x2_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x2_fpm (w8, z20, z16, fpm0), + svmla_za16_vg2x2_fpm (w8, z20, z16, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_mf8_vg2x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_mf8_vg2x4.c new file mode 100644 index 000000000000..204231314c40 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_mf8_vg2x4.c @@ -0,0 +1,287 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f16_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f16_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme+sme-f8f16" + +/* +** mla_0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.h\[\1, 0:1, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (0, z0, z0, fpm0), + svmla_za16_vg2x4_fpm (0, z0, z0, fpm0)) + +/* +** mla_w0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlal za\.h\[\1, 0:1, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w0, z0, z0, fpm0), + svmla_za16_vg2x4_fpm (w0, z0, z0, fpm0)) + +/* +** mla_w8_z0_z4: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w8, z0, z4, fpm0), + svmla_za16_vg2x4_fpm (w8, z0, z4, fpm0)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** msr fpmr, x1 +** ... +** fmlal za\.h\[w8, 0:1, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w8, z0, z18, fpm0), + svmla_za16_vg2x4_fpm (w8, z0, z18, fpm0)) + +/* +** mla_w8_z18_z0: +** msr fpmr, x1 +** ... +** fmlal za\.h\[w8, 0:1, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z0, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w8, z18, z0, fpm0), + svmla_za16_vg2x4_fpm (w8, z18, z0, fpm0)) + +/* +** mla_w8_z0_z23: +** msr fpmr, x1 +** ... +** fmlal za\.h\[w8, 0:1, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w8, z0, z23, fpm0), + svmla_za16_vg2x4_fpm (w8, z0, z23, fpm0)) + +/* +** mla_w8_z23_z0: +** msr fpmr, x1 +** ... +** fmlal za\.h\[w8, 0:1, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w8, z23, z0, fpm0), + svmla_za16_vg2x4_fpm (w8, z23, z0, fpm0)) + +/* +** mla_w8_z4_z28: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w8, z4, z28, fpm0), + svmla_za16_vg2x4_fpm (w8, z4, z28, fpm0)) + +/* +** mla_w8_z28_z0: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z0, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w8, z28, z0, fpm0), + svmla_za16_vg2x4_fpm (w8, z28, z0, fpm0)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w8 + 1, z4, z0, fpm0), + svmla_za16_vg2x4_fpm (w8 + 1, z4, z0, fpm0)) + +/* +** mla_w8p2_z4_z0: +** msr fpmr, x1 +** fmlal za\.h\[w8, 2:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w8 + 2, z4, z0, fpm0), + svmla_za16_vg2x4_fpm (w8 + 2, z4, z0, fpm0)) + +/* +** mla_w11p6_z4_z0: +** msr fpmr, x1 +** fmlal za\.h\[w11, 6:7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w11p6_z4_z0, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w11 + 6, z4, z0, fpm0), + svmla_za16_vg2x4_fpm (w11 + 6, z4, z0, fpm0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w8 + 7, z4, z0, fpm0), + svmla_za16_vg2x4_fpm (w8 + 7, z4, z0, fpm0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w8 + 8, z4, z4, fpm0), + svmla_za16_vg2x4_fpm (w8 + 8, z4, z4, fpm0)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svmfloat8x4_t, + svmla_za16_mf8_vg2x4_fpm (w8 - 1, z4, z0, fpm0), + svmla_za16_vg2x4_fpm (w8 - 1, z4, z0, fpm0)) + +/* +** mla_single_0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlal za\.h\[\1, 0:1, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x4_fpm (0, z1, z0, fpm0), + svmla_za16_vg2x4_fpm (0, z1, z0, fpm0)) + +/* +** mla_single_w0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlal za\.h\[\1, 0:1, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x4_fpm (w0, z1, z0, fpm0), + svmla_za16_vg2x4_fpm (w0, z1, z0, fpm0)) + +/* +** mla_single_w8_z1_z0: +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x4_fpm (w8, z1, z0, fpm0), + svmla_za16_vg2x4_fpm (w8, z1, z0, fpm0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x4_fpm (w8 + 1, z1, z0, fpm0), + svmla_za16_vg2x4_fpm (w8 + 1, z1, z0, fpm0)) + +/* +** mla_single_w8p2_z20_z0: +** msr fpmr, x1 +** fmlal za\.h\[w8, 2:3, vgx4\], {z20\.b - z23\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p2_z20_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x4_fpm (w8 + 2, z20, z0, fpm0), + svmla_za16_vg2x4_fpm (w8 + 2, z20, z0, fpm0)) + +/* +** mla_single_w8p6_z27_z0: +** msr fpmr, x1 +** fmlal za\.h\[w8, 6:7, vgx4\], {z27\.b - z30\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x4_fpm (w8 + 6, z27, z0, fpm0), + svmla_za16_vg2x4_fpm (w8 + 6, z27, z0, fpm0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x4_fpm (w8 + 7, z1, z0, fpm0), + svmla_za16_vg2x4_fpm (w8 + 7, z1, z0, fpm0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x4_fpm (w8 + 8, z1, z0, fpm0), + svmla_za16_vg2x4_fpm (w8 + 8, z1, z0, fpm0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fmlal za\.h\[\1, 0:1, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x4_fpm (w0 - 1, z1, z0, fpm0), + svmla_za16_vg2x4_fpm (w0 - 1, z1, z0, fpm0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fmlal za\.h\[w8, 0:1, vgx4\], {z0\.b - z3\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svmfloat8x4_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x4_fpm (w8, z0, z15, fpm0), + svmla_za16_vg2x4_fpm (w8, z0, z15, fpm0)) + +/* +** mla_single_w8_z20_z16: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fmlal za\.h\[w8, 0:1, vgx4\], {z20\.b - z23\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svmfloat8x4_t, svmfloat8_t, + svmla_single_za16_mf8_vg2x4_fpm (w8, z20, z16, fpm0), + svmla_za16_vg2x4_fpm (w8, z20, z16, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_mf8_vg4x1.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_mf8_vg4x1.c new file mode 100644 index 000000000000..cb1832b18d9a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_mf8_vg4x1.c @@ -0,0 +1,167 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme+sme-f8f32" + +/* +** mla_0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlall za\.s\[\1, 0:3\], z0\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_0_z0_z0, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (0, z0, z0, fpm0), + svmla_za32_vg4x1_fpm (0, z0, z0, fpm0)) + +/* +** mla_w0_z0_z3: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlall za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w0_z0_z3, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w0, z0, z3, fpm0), + svmla_za32_vg4x1_fpm (w0, z0, z3, fpm0)) + +/* +** mla_w7_z0_z3: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w7 +** fmlall za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w7_z0_z3, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w7, z0, z3, fpm0), + svmla_za32_vg4x1_fpm (w7, z0, z3, fpm0)) + +/* +** mla_w8_z7_z3: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3\], z7\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w8_z7_z3, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w8, z7, z3, fpm0), + svmla_za32_vg4x1_fpm (w8, z7, z3, fpm0)) + +/* +** mla_w8_z31_z16: +** msr fpmr, x1 +** mov (z[0-7])\.d, z16\.d +** fmlall za\.s\[w8, 0:3\], z31\.b. \1\.b +** ret +*/ +TEST_ZA_X1 (mla_w8_z31_z16, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w8, z31, z16, fpm0), + svmla_za32_vg4x1_fpm (w8, z31, z16, fpm0)) + +/* +** mla_w8p1_z0_z0: +** add (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3\], z0\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p1_z0_z0, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w8 + 1, z0, z0, fpm0), + svmla_za32_vg4x1_fpm (w8 + 1, z0, z0, fpm0)) + +/* +** mla_w10p4_z23_z0: +** msr fpmr, x1 +** fmlall za\.s\[w10, 4:7\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w10p4_z23_z0, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w10 + 4, z23, z0, fpm0), + svmla_za32_vg4x1_fpm (w10 + 4, z23, z0, fpm0)) + +/* +** mla_w11p6_z23_z0: +** add (w8|w9|w10|w11), w11, #?6 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w11p6_z23_z0, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w11 + 6, z23, z0, fpm0), + svmla_za32_vg4x1_fpm (w11 + 6, z23, z0, fpm0)) + +/* +** mla_w9p8_z7_z7: +** msr fpmr, x1 +** fmlall za\.s\[w9, 8:11\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mla_w9p8_z7_z7, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w9 + 8, z7, z7, fpm0), + svmla_za32_vg4x1_fpm (w9 + 8, z7, z7, fpm0)) + +/* +** mla_w11p12_z23_z0: +** msr fpmr, x1 +** fmlall za\.s\[w11, 12:15\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w11p12_z23_z0, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w11 + 12, z23, z0, fpm0), + svmla_za32_vg4x1_fpm (w11 + 12, z23, z0, fpm0)) + +/* +** mla_w8p14_z23_z0: +** add (w8|w9|w10|w11), w8, #?14 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3\], z23\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p14_z23_z0, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w8 + 14, z23, z0, fpm0), + svmla_za32_vg4x1_fpm (w8 + 14, z23, z0, fpm0)) + +/* +** mla_w8p15_z7_z7: +** add (w8|w9|w10|w11), w8, #?15 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p15_z7_z7, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w8 + 15, z7, z7, fpm0), + svmla_za32_vg4x1_fpm (w8 + 15, z7, z7, fpm0)) + +/* +** mla_w8p16_z7_z7: +** add (w8|w9|w10|w11), w8, #?16 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3\], z7\.b, z7\.b +** ret +*/ +TEST_ZA_X1 (mla_w8p16_z7_z7, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w8 + 16, z7, z7, fpm0), + svmla_za32_vg4x1_fpm (w8 + 16, z7, z7, fpm0)) + +/* +** mla_w8m1_z16_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3\], z16\.b, z0\.b +** ret +*/ +TEST_ZA_X1 (mla_w8m1_z16_z0, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w8 - 1, z16, z0, fpm0), + svmla_za32_vg4x1_fpm (w8 - 1, z16, z0, fpm0)) + +/* +** mla_w12_z0_z3: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w12 +** fmlall za\.s\[\1, 0:3\], z0\.b, z3\.b +** ret +*/ +TEST_ZA_X1 (mla_w12_z0_z3, svmfloat8_t, + svmla_za32_mf8_vg4x1_fpm (w12, z0, z3, fpm0), + svmla_za32_vg4x1_fpm (w12, z0, z3, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_mf8_vg4x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_mf8_vg4x2.c new file mode 100644 index 000000000000..246a492ad3ee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_mf8_vg4x2.c @@ -0,0 +1,277 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme+sme-f8f32" + +/* +** mla_0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (0, z0, z0, fpm0), + svmla_za32_vg4x2_fpm (0, z0, z0, fpm0)) + +/* +** mla_w0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlall za\.s\[\1, 0:3, vgx2\], {z0\.b - z1\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (w0, z0, z0, fpm0), + svmla_za32_vg4x2_fpm (w0, z0, z0, fpm0)) + +/* +** mla_w8_z0_z4: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (w8, z0, z4, fpm0), + svmla_za32_vg4x2_fpm (w8, z0, z4, fpm0)) + +/* +** mla_w8_z4_z18: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z18\.b - z19\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z18, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (w8, z4, z18, fpm0), + svmla_za32_vg4x2_fpm (w8, z4, z18, fpm0)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z23: +** msr fpmr, x1 +** ... +** fmlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (w8, z0, z23, fpm0), + svmla_za32_vg4x2_fpm (w8, z0, z23, fpm0)) + +/* +** mla_w8_z23_z0: +** msr fpmr, x1 +** ... +** fmlall za\.s\[w8, 0:3, vgx2\], [^\n]+, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (w8, z23, z0, fpm0), + svmla_za32_vg4x2_fpm (w8, z23, z0, fpm0)) + +/* +** mla_w8_z18_z28: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx2\], {z18\.b - z19\.b}, {z28\.b - z29\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z28, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (w8, z18, z28, fpm0), + svmla_za32_vg4x2_fpm (w8, z18, z28, fpm0)) + +/* +** mla_w8_z28_z4: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx2\], {z28\.b - z29\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z4, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (w8, z28, z4, fpm0), + svmla_za32_vg4x2_fpm (w8, z28, z4, fpm0)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (w8 + 1, z4, z0, fpm0), + svmla_za32_vg4x2_fpm (w8 + 1, z4, z0, fpm0)) + +/* +** mla_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (w8 + 2, z4, z0, fpm0), + svmla_za32_vg4x2_fpm (w8 + 2, z4, z0, fpm0)) + +/* +** mla_w11p4_z4_z0: +** msr fpmr, x1 +** fmlall za\.s\[w11, 4:7, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w11p4_z4_z0, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (w11 + 4, z4, z0, fpm0), + svmla_za32_vg4x2_fpm (w11 + 4, z4, z0, fpm0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (w8 + 7, z4, z0, fpm0), + svmla_za32_vg4x2_fpm (w8 + 7, z4, z0, fpm0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z4\.b - z5\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (w8 + 8, z4, z4, fpm0), + svmla_za32_vg4x2_fpm (w8 + 8, z4, z4, fpm0)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx2\], {z4\.b - z5\.b}, {z0\.b - z1\.b} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svmfloat8x2_t, + svmla_za32_mf8_vg4x2_fpm (w8 - 1, z4, z0, fpm0), + svmla_za32_vg4x2_fpm (w8 - 1, z4, z0, fpm0)) + +/* +** mla_single_0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x2_fpm (0, z1, z0, fpm0), + svmla_za32_vg4x2_fpm (0, z1, z0, fpm0)) + +/* +** mla_single_w0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x2_fpm (w0, z1, z0, fpm0), + svmla_za32_vg4x2_fpm (w0, z1, z0, fpm0)) + +/* +** mla_single_w8_z1_z0: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x2_fpm (w8, z1, z0, fpm0), + svmla_za32_vg4x2_fpm (w8, z1, z0, fpm0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x2_fpm (w8 + 1, z1, z0, fpm0), + svmla_za32_vg4x2_fpm (w8 + 1, z1, z0, fpm0)) + +/* +** mla_single_w8p2_z20_z0: +** add (w8|w9|w10|w11), w8, #?2 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx2\], {z20\.b - z21\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p2_z20_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x2_fpm (w8 + 2, z20, z0, fpm0), + svmla_za32_vg4x2_fpm (w8 + 2, z20, z0, fpm0)) + +/* +** mla_single_w11p4_z27_z0: +** msr fpmr, x1 +** fmlall za\.s\[w11, 4:7, vgx2\], {z27\.b - z28\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w11p4_z27_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x2_fpm (w11 + 4, z27, z0, fpm0), + svmla_za32_vg4x2_fpm (w11 + 4, z27, z0, fpm0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x2_fpm (w8 + 7, z1, z0, fpm0), + svmla_za32_vg4x2_fpm (w8 + 7, z1, z0, fpm0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x2_fpm (w8 + 8, z1, z0, fpm0), + svmla_za32_vg4x2_fpm (w8 + 8, z1, z0, fpm0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx2\], {z1\.b - z2\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svmfloat8x2_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x2_fpm (w0 - 1, z1, z0, fpm0), + svmla_za32_vg4x2_fpm (w0 - 1, z1, z0, fpm0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx2\], {z0\.b - z1\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svmfloat8x2_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x2_fpm (w8, z0, z15, fpm0), + svmla_za32_vg4x2_fpm (w8, z0, z15, fpm0)) + +/* +** mla_single_w8_z20_z16: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fmlall za\.s\[w8, 0:3, vgx2\], {z20\.b - z21\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svmfloat8x2_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x2_fpm (w8, z20, z16, fpm0), + svmla_za32_vg4x2_fpm (w8, z20, z16, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_mf8_vg4x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_mf8_vg4x4.c new file mode 100644 index 000000000000..1b10dc81711b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za32_mf8_vg4x4.c @@ -0,0 +1,289 @@ +/* { dg-do assemble { target { aarch64_asm_sme-f8f32_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme-f8f32_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" +#pragma GCC target "+sme+sme-f8f32" + +/* +** mla_0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_0_z0_z0, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (0, z0, z0, fpm0), + svmla_za32_vg4x4_fpm (0, z0, z0, fpm0)) + +/* +** mla_w0_z0_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlall za\.s\[\1, 0:3, vgx4\], {z0\.b - z3\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w0_z0_z0, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w0, z0, z0, fpm0), + svmla_za32_vg4x4_fpm (w0, z0, z0, fpm0)) + +/* +** mla_w8_z0_z4: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z4, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w8, z0, z4, fpm0), + svmla_za32_vg4x4_fpm (w8, z0, z4, fpm0)) + +/* Leave the assembler to check for correctness for misaligned registers. */ + +/* +** mla_w8_z0_z18: +** msr fpmr, x1 +** ... +** fmlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z18, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w8, z0, z18, fpm0), + svmla_za32_vg4x4_fpm (w8, z0, z18, fpm0)) + +/* +** mla_w8_z18_z0: +** msr fpmr, x1 +** ... +** fmlall za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z18_z0, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w8, z18, z0, fpm0), + svmla_za32_vg4x4_fpm (w8, z18, z0, fpm0)) + +/* +** mla_w8_z0_z23: +** msr fpmr, x1 +** ... +** fmlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, [^\n]+ +** ret +*/ +TEST_ZA_XN (mla_w8_z0_z23, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w8, z0, z23, fpm0), + svmla_za32_vg4x4_fpm (w8, z0, z23, fpm0)) + +/* +** mla_w8_z23_z0: +** msr fpmr, x1 +** ... +** fmlall za\.s\[w8, 0:3, vgx4\], [^\n]+, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z23_z0, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w8, z23, z0, fpm0), + svmla_za32_vg4x4_fpm (w8, z23, z0, fpm0)) + +/* +** mla_w8_z4_z28: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx4\], {z4\.b - z7\.b}, {z28\.b - z31\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z4_z28, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w8, z4, z28, fpm0), + svmla_za32_vg4x4_fpm (w8, z4, z28, fpm0)) + +/* +** mla_w8_z28_z0: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx4\], {z28\.b - z31\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8_z28_z0, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w8, z28, z0, fpm0), + svmla_za32_vg4x4_fpm (w8, z28, z0, fpm0)) + +/* +** mla_w8p1_z4_z0: +** add (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p1_z4_z0, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w8 + 1, z4, z0, fpm0), + svmla_za32_vg4x4_fpm (w8 + 1, z4, z0, fpm0)) + +/* +** mla_w8p2_z4_z0: +** add (w8|w9|w10|w11), w8, #?2 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p2_z4_z0, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w8 + 2, z4, z0, fpm0), + svmla_za32_vg4x4_fpm (w8 + 2, z4, z0, fpm0)) + +/* +** mla_w11p4_z4_z0: +** msr fpmr, x1 +** fmlall za\.s\[w11, 4:7, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w11p4_z4_z0, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w11 + 4, z4, z0, fpm0), + svmla_za32_vg4x4_fpm (w11 + 4, z4, z0, fpm0)) + +/* +** mla_w8p7_z4_z0: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p7_z4_z0, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w8 + 7, z4, z0, fpm0), + svmla_za32_vg4x4_fpm (w8 + 7, z4, z0, fpm0)) + +/* +** mla_w8p8_z4_z4: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z4\.b - z7\.b} +** ret +*/ +TEST_ZA_XN (mla_w8p8_z4_z4, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w8 + 8, z4, z4, fpm0), + svmla_za32_vg4x4_fpm (w8 + 8, z4, z4, fpm0)) + +/* +** mla_w8m1_z4_z0: +** sub (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z4\.b - z7\.b}, {z0\.b - z3\.b} +** ret +*/ +TEST_ZA_XN (mla_w8m1_z4_z0, svmfloat8x4_t, + svmla_za32_mf8_vg4x4_fpm (w8 - 1, z4, z0, fpm0), + svmla_za32_vg4x4_fpm (w8 - 1, z4, z0, fpm0)) + +/* +** mla_single_0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), #?0 +** fmlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_0_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x4_fpm (0, z1, z0, fpm0), + svmla_za32_vg4x4_fpm (0, z1, z0, fpm0)) + +/* +** mla_single_w0_z1_z0: +** msr fpmr, x1 +** mov (w8|w9|w10|w11), w0 +** fmlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x4_fpm (w0, z1, z0, fpm0), + svmla_za32_vg4x4_fpm (w0, z1, z0, fpm0)) + +/* +** mla_single_w8_z1_z0: +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x4_fpm (w8, z1, z0, fpm0), + svmla_za32_vg4x4_fpm (w8, z1, z0, fpm0)) + +/* +** mla_single_w8p1_z1_z0: +** add (w8|w9|w10|w11), w8, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p1_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x4_fpm (w8 + 1, z1, z0, fpm0), + svmla_za32_vg4x4_fpm (w8 + 1, z1, z0, fpm0)) + +/* +** mla_single_w8p4_z20_z0: +** msr fpmr, x1 +** fmlall za\.s\[w8, 4:7, vgx4\], {z20\.b - z23\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p4_z20_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x4_fpm (w8 + 4, z20, z0, fpm0), + svmla_za32_vg4x4_fpm (w8 + 4, z20, z0, fpm0)) + +/* +** mla_single_w8p6_z27_z0: +** add (w8|w9|w10|w11), w8, #?6 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z27\.b - z30\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p6_z27_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x4_fpm (w8 + 6, z27, z0, fpm0), + svmla_za32_vg4x4_fpm (w8 + 6, z27, z0, fpm0)) + +/* +** mla_single_w8p7_z1_z0: +** add (w8|w9|w10|w11), w8, #?7 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x4_fpm (w8 + 7, z1, z0, fpm0), + svmla_za32_vg4x4_fpm (w8 + 7, z1, z0, fpm0)) + +/* +** mla_single_w8p8_z1_z0: +** add (w8|w9|w10|w11), w8, #?8 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x4_fpm (w8 + 8, z1, z0, fpm0), + svmla_za32_vg4x4_fpm (w8 + 8, z1, z0, fpm0)) + +/* +** mla_single_w0m1_z1_z0: +** sub (w8|w9|w10|w11), w0, #?1 +** msr fpmr, x1 +** fmlall za\.s\[\1, 0:3, vgx4\], {z1\.b - z4\.b}, z0\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svmfloat8x4_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x4_fpm (w0 - 1, z1, z0, fpm0), + svmla_za32_vg4x4_fpm (w0 - 1, z1, z0, fpm0)) + +/* +** mla_single_w8_z0_z15: +** str d15, \[sp, #?-16\]! +** msr fpmr, x1 +** fmlall za\.s\[w8, 0:3, vgx4\], {z0\.b - z3\.b}, z15\.b +** ldr d15, \[sp\], #?16 +** ret +*/ +TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svmfloat8x4_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x4_fpm (w8, z0, z15, fpm0), + svmla_za32_vg4x4_fpm (w8, z0, z15, fpm0)) + +/* +** mla_single_w8_z20_z16: +** msr fpmr, x1 +** mov (z[0-7]).d, z16.d +** fmlall za\.s\[w8, 0:3, vgx4\], {z20\.b - z23\.b}, \1\.b +** ret +*/ +TEST_ZA_SINGLE (mla_single_w8_z20_z16, svmfloat8x4_t, svmfloat8_t, + svmla_single_za32_mf8_vg4x4_fpm (w8, z20, z16, fpm0), + svmla_za32_vg4x4_fpm (w8, z20, z16, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h index 8b982caf4384..ff237983ad93 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h @@ -4,7 +4,7 @@ #include "../../sme/acle-asm/test_sme_acle.h" #define TEST_ZA_X1(NAME, ZTYPE, CODE1, CODE2) \ - PROTO (NAME, void, (int w0)) \ + PROTO (NAME, void, (int w0, fpm_t fpm0)) \ { \ register int w7 __asm ("w7"); \ register int w8 __asm ("w8"); \ @@ -26,7 +26,7 @@ } #define TEST_ZA_XN(NAME, TTYPE, CODE1, CODE2) \ - PROTO (NAME, void, (int w0)) \ + PROTO (NAME, void, (int w0, fpm_t fpm0)) \ { \ register int w7 __asm ("w7"); \ register int w8 __asm ("w8"); \ @@ -68,7 +68,7 @@ } #define TEST_ZA_SINGLE(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ - PROTO (NAME, void, (int w0)) \ + PROTO (NAME, void, (int w0, fpm_t fpm0)) \ { \ register int w8 __asm ("w8"); \ register int w11 __asm ("w11"); \ @@ -84,7 +84,7 @@ } #define TEST_ZA_SINGLE_Z15(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ - PROTO (NAME, void, (int w0)) \ + PROTO (NAME, void, (int w0, fpm_t fpm0)) \ { \ register int w8 __asm ("w8"); \ register TTYPE z0 __asm ("z0"); \ @@ -94,7 +94,7 @@ } #define TEST_ZA_LANE(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ - PROTO (NAME, void, (int w0)) \ + PROTO (NAME, void, (int w0, fpm_t fpm0)) \ { \ register int w8 __asm ("w8"); \ register int w11 __asm ("w11"); \ @@ -112,7 +112,7 @@ } #define TEST_ZA_LANE_Z15(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ - PROTO (NAME, void, (int w0)) \ + PROTO (NAME, void, (int w0, fpm_t fpm0)) \ { \ register int w8 __asm ("w8"); \ register TTYPE z4 __asm ("z4"); \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_1.c index 2c60d50c6eda..885529804232 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_lane_1.c @@ -71,3 +71,17 @@ f4 (svint16_t s16, svuint16_t u16, svmla_lane_za64_vg4x1 (0, s64, s64, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svint64_t' arguments} } */ svmla_lane_za64_vg4x1 (0, u64, u64, 0); /* { dg-error {'svmla_lane_za64_vg4x1' has no form that takes 'svuint64_t' arguments} } */ } + +#pragma GCC target ("+sme-f8f32") + +f5 (svmfloat8_t mf8, + svmfloat8x2_t mf8x2, + double d, fpm_t fpm) + __arm_streaming __arm_inout("za") +{ + svmla_lane_za32_vg4x1_fpm (d, mf8, mf8, 0); /* { dg-error {too few arguments to function 'svmla_lane_za32_vg4x1_fpm'} } */ + svmla_lane_za32_vg4x1_fpm (d, mf8, mf8, 0, 0, fpm); /* { dg-error {too many arguments to function 'svmla_lane_za32_vg4x1_fpm'} } */ + svmla_lane_za32_vg4x1_fpm (d, mf8, mf8, 0, fpm); + svmla_lane_za32_vg4x1_fpm (d, mf8, mf8, -1, fpm); /* { dg-error {passing -1 to argument 4 of 'svmla_lane_za32_vg4x1_fpm', which expects a value in the range \[0, 15\]} } */ + svmla_lane_za32_vg4x1_fpm (d, mf8, mf8, 16, fpm); /* { dg-error {passing 16 to argument 4 of 'svmla_lane_za32_vg4x1_fpm', which expects a value in the range \[0, 15\]} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_1.c index a361f7f5cb6b..b1d9a82916ac 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_slice_opt_single_1.c @@ -74,3 +74,19 @@ f4 (svint32x2_t s32x2, svuint32x2_t u32x2, svadd_write_za64_vg1x2 (1, s64x2, s64x2); svadd_write_za64_vg1x2 (1, u64x2, u64x2); } + +#pragma GCC target ("+sme-f8f16") + +void +f5 (svmfloat8x2_t mf8x2, svmfloat8_t mf8, + svfloat16x2_t f16x2, svfloat16_t f16, + fpm_t fpm) + __arm_streaming __arm_inout("za") +{ + svmla_single_za16_mf8_vg2x2_fpm (1, mf8x2, mf8); /* { dg-error {too few arguments to function 'svmla_single_za16_mf8_vg2x2_fpm'} } */ + svmla_single_za16_mf8_vg2x2_fpm (1, mf8x2, mf8, fpm); + svmla_single_za16_mf8_vg2x2_fpm (1, mf8x2, mf8, fpm, fpm); /* { dg-error {too many arguments to function 'svmla_single_za16_mf8_vg2x2_fpm'} } */ + svmla_single_za16_mf8_vg2x2_fpm (1, mf8x2, f16, fpm); /* { dg-error {incompatible type for argument 3 of 'svmla_single_za16_mf8_vg2x2_fpm'} } */ + svmla_single_za16_mf8_vg2x2_fpm (1, f16x2, mf8, fpm); /* { dg-error {incompatible type for argument 2 of 'svmla_single_za16_mf8_vg2x2_fpm'} } */ + svmla_single_za16_mf8_vg2x2_fpm (1, mf8x2, f16, fpm); /* { dg-error {incompatible type for argument 3 of 'svmla_single_za16_mf8_vg2x2_fpm'} } */ +}
