https://gcc.gnu.org/g:d965495317484ccd5a8eb3f0b27580c826feb5eb
commit r16-6379-gd965495317484ccd5a8eb3f0b27580c826feb5eb Author: Claudio Bantaloukas <[email protected]> Date: Wed Dec 24 11:41:25 2025 +0000 aarch64: add widening sme2 fp8 conversions This patch adds the following intrinsics (all __arm_streaming only) along with asm tests for them under the +sme2+fp8 flags: - svfloat16x2_t svcvt1_f16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) - svfloat16x2_t svcvt2_f16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) - svfloat16x2_t svcvt1_bf16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) - svfloat16x2_t svcvt2_bf16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) - svfloat16x2_t svcvtl1_f16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) - svfloat16x2_t svcvtl2_f16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) - svfloat16x2_t svcvtl1_bf16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) - svfloat16x2_t svcvtl2_bf16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) gcc/ * config/aarch64/aarch64-sve-builtins-sve2.cc (svcvtl1, svcvtl2): Added new FUNTIONs. * config/aarch64/aarch64-sve-builtins-sve2.def (svcvt1, svcvt2, svcvtl1, svcvtl2): Added new DEF_SVE_FUNCTION_GS_FPM. * config/aarch64/aarch64-sve-builtins-sve2.h (svcvtl1, svcvtl2): Added new function_base. * config/aarch64/aarch64-sve-builtins.cc (function_resolver::resolve_unary): use group_suffix_id when resolving C overloads. * config/aarch64/aarch64-sve2.md (@aarch64_sve2_fp8_cvt_<fp8_cvt_uns_op><mode>): Added new define_insn. * config/aarch64/aarch64.h (TARGET_SSME2_FP8): Added new define. * config/aarch64/iterators.md (UNSPEC_F1CVTL. UNSPEC_F2CVTL): Added new unspecs. (FP8CVT_UNS): Extended int_iterator. (fp8_cvt_uns_op): Likewise. gcc/testsuite/ * g++.target/aarch64/sme2/aarch64-sme2-acle-asm.exp: Use tuning flag to reduce churn in testsuites. * gcc.target/aarch64/sme2/aarch64-sme2-acle-asm.exp: Likewise. * gcc.target/aarch64/sme2/acle-asm/cvt_mf8_x2.c: Added test file. * gcc.target/aarch64/sme2/acle-asm/cvtl_mf8_x2.c: Likewise. * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h (TEST_X2_WIDE): Added fpm0 argument for intrinsics. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-sve2.cc | 2 + gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 8 ++++ gcc/config/aarch64/aarch64-sve-builtins-sve2.h | 2 + gcc/config/aarch64/aarch64-sve-builtins.cc | 3 +- gcc/config/aarch64/aarch64-sve2.md | 10 +++++ gcc/config/aarch64/aarch64.h | 2 + gcc/config/aarch64/iterators.md | 6 +++ .../aarch64/sme2/aarch64-sme2-acle-asm.exp | 3 +- .../aarch64/sme2/aarch64-sme2-acle-asm.exp | 3 +- .../gcc.target/aarch64/sme2/acle-asm/cvt_mf8_x2.c | 47 ++++++++++++++++++++++ .../gcc.target/aarch64/sme2/acle-asm/cvtl_mf8_x2.c | 47 ++++++++++++++++++++++ .../aarch64/sve/acle/asm/test_sve_acle.h | 1 + 12 files changed, 131 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index 95c5ed81d610..ee392c3a745b 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -1020,6 +1020,8 @@ FUNCTION (svclamp, svclamp_impl,) FUNCTION (svcvt1, svcvt_fp8_impl, (UNSPEC_F1CVT)) FUNCTION (svcvt2, svcvt_fp8_impl, (UNSPEC_F2CVT)) FUNCTION (svcvtl, svcvtl_impl,) +FUNCTION (svcvtl1, svcvt_fp8_impl, (UNSPEC_F1CVTL)) +FUNCTION (svcvtl2, svcvt_fp8_impl, (UNSPEC_F2CVTL)) FUNCTION (svcvtlt1, svcvt_fp8_impl, (UNSPEC_F1CVTLT)) FUNCTION (svcvtlt2, svcvt_fp8_impl, (UNSPEC_F2CVTLT)) FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT)) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index 7075e3ff1232..c271b97de87c 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -418,3 +418,11 @@ DEF_SVE_FUNCTION_GS_FPM (svdot_lane, ternary_mfloat8_lane_group_selection, s_flo DEF_SVE_FUNCTION_GS_FPM (svdot, ternary_mfloat8, h_float_mf8, none, none, set) DEF_SVE_FUNCTION_GS_FPM (svdot_lane, ternary_mfloat8_lane_group_selection, h_float_mf8, none, none, set) #undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS \ + streaming_only (AARCH64_FL_SME2 | AARCH64_FL_FP8) +DEF_SVE_FUNCTION_GS_FPM (svcvt1, unary_convert, cvt_mf8, x2, none, set) +DEF_SVE_FUNCTION_GS_FPM (svcvt2, unary_convert, cvt_mf8, x2, none, set) +DEF_SVE_FUNCTION_GS_FPM (svcvtl1, unary_convert, cvt_mf8, x2, none, set) +DEF_SVE_FUNCTION_GS_FPM (svcvtl2, unary_convert, cvt_mf8, x2, none, set) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h index 6d7d0af26416..7c1745f5c9eb 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h @@ -64,6 +64,8 @@ namespace aarch64_sve extern const function_base *const svcvt1; extern const function_base *const svcvt2; extern const function_base *const svcvtl; + extern const function_base *const svcvtl1; + extern const function_base *const svcvtl2; extern const function_base *const svcvtlt; extern const function_base *const svcvtlt1; extern const function_base *const svcvtlt2; diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index e8eeedb4d363..03481ee4a775 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -3207,7 +3207,8 @@ function_resolver::resolve_unary (type_class_index merge_tclass, /* Handle convert-like functions in which the first type suffix is explicit. */ if (type_suffix_ids[0] != NUM_TYPE_SUFFIXES) - return resolve_to (mode_suffix_id, type_suffix_ids[0], type); + return resolve_to (mode_suffix_id, type_suffix_ids[0], type, + group_suffix_id); return resolve_to (mode_suffix_id, type); } diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 910918351829..ab8098d33278 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -3591,6 +3591,16 @@ [(set_attr "sve_type" "sve_fp_cvt")] ) +(define_insn "@aarch64_sve2_fp8_cvt_<fp8_cvt_uns_op><mode>" + [(set (match_operand:SVE_FULL_HFx2 0 "aligned_register_operand" "=Uw2") + (unspec:SVE_FULL_HFx2 + [(match_operand:VNx16QI 1 "register_operand" "w") + (reg:DI FPM_REGNUM)] + FP8CVT_UNS))] + "TARGET_SSME2_FP8" + "<b><fp8_cvt_uns_op>\t%0, %1.b" +) + ;; ------------------------------------------------------------------------- ;; ---- [FP<-FP] Multi-vector narrowing conversions ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 8e1018e4014e..e3eb807fb53e 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -538,6 +538,8 @@ through +ssve-fp8dot2. */ (TARGET_SVE2 && TARGET_FP8DOT2) || TARGET_STREAMING) \ && (AARCH64_HAVE_ISA(SSVE_FP8DOT2) || TARGET_NON_STREAMING)) +#define TARGET_SSME2_FP8 (TARGET_FP8 && TARGET_STREAMING_SME2) + /* Standard register usage. */ /* 31 64-bit general purpose registers R0-R30: diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index c5120c37afae..026c3101e38d 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1055,8 +1055,10 @@ UNSPEC_EORBT ; Used in aarch64-sve2.md. UNSPEC_EORTB ; Used in aarch64-sve2.md. UNSPEC_F1CVT ; Used in aarch64-sve2.md. + UNSPEC_F1CVTL ; Used in aarch64-sve2.md. UNSPEC_F1CVTLT ; Used in aarch64-sve2.md. UNSPEC_F2CVT ; Used in aarch64-sve2.md. + UNSPEC_F2CVTL ; Used in aarch64-sve2.md. UNSPEC_F2CVTLT ; Used in aarch64-sve2.md. UNSPEC_FADDP ; Used in aarch64-sve2.md. UNSPEC_FCVTNB ; Used in aarch64-sve2.md. @@ -4044,6 +4046,8 @@ (define_int_iterator FP8CVT_UNS [UNSPEC_F1CVT UNSPEC_F2CVT + UNSPEC_F1CVTL + UNSPEC_F2CVTL UNSPEC_F1CVTLT UNSPEC_F2CVTLT]) @@ -5187,6 +5191,8 @@ (define_int_attr fp8_cvt_uns_op [(UNSPEC_F1CVT "f1cvt") (UNSPEC_F2CVT "f2cvt") + (UNSPEC_F1CVTL "f1cvtl") + (UNSPEC_F2CVTL "f2cvtl") (UNSPEC_F1CVTLT "f1cvtlt") (UNSPEC_F2CVTLT "f2cvtlt")]) diff --git a/gcc/testsuite/g++.target/aarch64/sme2/aarch64-sme2-acle-asm.exp b/gcc/testsuite/g++.target/aarch64/sme2/aarch64-sme2-acle-asm.exp index 334b1108ddcd..b182f9d4a965 100644 --- a/gcc/testsuite/g++.target/aarch64/sme2/aarch64-sme2-acle-asm.exp +++ b/gcc/testsuite/g++.target/aarch64/sme2/aarch64-sme2-acle-asm.exp @@ -39,7 +39,8 @@ if { [check_effective_target_aarch64_sme2] } { # Turn off any codegen tweaks by default that may affect expected assembly. # Tests relying on those should turn them on explicitly. -set sme2_flags "$sme2_flags -mtune=generic -moverride=tune=none" +# Reduce testsuite churn when writing to fmpr +set sme2_flags "$sme2_flags -mtune=generic -moverride=tune=none -moverride=tune=cheap_fpmr_write" global gcc_runtest_parallelize_limit_minor if { [info exists gcc_runtest_parallelize_limit_minor] } { diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/aarch64-sme2-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sme2/aarch64-sme2-acle-asm.exp index 6bd8784779c6..256c484bb846 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/aarch64-sme2-acle-asm.exp +++ b/gcc/testsuite/gcc.target/aarch64/sme2/aarch64-sme2-acle-asm.exp @@ -39,7 +39,8 @@ if { [check_effective_target_aarch64_sme2] } { # Turn off any codegen tweaks by default that may affect expected assembly. # Tests relying on those should turn them on explicitly. -set sme2_flags "$sme2_flags -mtune=generic -moverride=tune=none" +# Reduce testsuite churn when writing to fmpr +set sme2_flags "$sme2_flags -mtune=generic -moverride=tune=none -moverride=tune=cheap_fpmr_write" global gcc_runtest_parallelize_limit_minor if { [info exists gcc_runtest_parallelize_limit_minor] } { diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_mf8_x2.c new file mode 100644 index 000000000000..0fb20c8edf80 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_mf8_x2.c @@ -0,0 +1,47 @@ +/* { dg-do assemble { target { aarch64_asm_fp8_ok && aarch64_asm_sme2_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_fp8_ok && aarch64_asm_sme2_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+fp8" + +/* +** cvt1_f16_mf8_x2_fpm: +** msr fpmr, x0 +** f1cvt {z0\.h - z1\.h}, z0\.b +** ret +*/ +TEST_X2_WIDE (cvt1_f16_mf8_x2_fpm, svfloat16x2_t, svmfloat8_t, + z0_res = svcvt1_f16_mf8_x2_fpm (z0, fpm0), + z0_res = svcvt1_f16_x2_fpm (z0, fpm0)) + +/* +** cvt1_bf16_mf8_x2_fpm: +** msr fpmr, x0 +** bf1cvt {z0\.h - z1\.h}, z0\.b +** ret +*/ +TEST_X2_WIDE (cvt1_bf16_mf8_x2_fpm, svbfloat16x2_t, svmfloat8_t, + z0_res = svcvt1_bf16_mf8_x2_fpm (z0, fpm0), + z0_res = svcvt1_bf16_x2_fpm (z0, fpm0)) + +/* +** cvt2_f16_mf8_x2_fpm: +** msr fpmr, x0 +** f2cvt {z0\.h - z1\.h}, z0\.b +** ret +*/ +TEST_X2_WIDE (cvt2_f16_mf8_x2_fpm, svfloat16x2_t, svmfloat8_t, + z0_res = svcvt2_f16_mf8_x2_fpm (z0, fpm0), + z0_res = svcvt2_f16_x2_fpm (z0, fpm0)) + +/* +** cvt2_bf16_mf8_x2_fpm: +** msr fpmr, x0 +** bf2cvt {z0\.h - z1\.h}, z0\.b +** ret +*/ +TEST_X2_WIDE (cvt2_bf16_mf8_x2_fpm, svbfloat16x2_t, svmfloat8_t, + z0_res = svcvt2_bf16_mf8_x2_fpm (z0, fpm0), + z0_res = svcvt2_bf16_x2_fpm (z0, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtl_mf8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtl_mf8_x2.c new file mode 100644 index 000000000000..8a8326bd068c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtl_mf8_x2.c @@ -0,0 +1,47 @@ +/* { dg-do assemble { target { aarch64_asm_fp8_ok && aarch64_asm_sme2_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_fp8_ok && aarch64_asm_sme2_ok } } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sme2_acle.h" + +#pragma GCC target "+fp8" + +/* +** cvtl1_f16_mf8_x2_fpm: +** msr fpmr, x0 +** f1cvtl {z0\.h - z1\.h}, z0\.b +** ret +*/ +TEST_X2_WIDE (cvtl1_f16_mf8_x2_fpm, svfloat16x2_t, svmfloat8_t, + z0_res = svcvtl1_f16_mf8_x2_fpm (z0, fpm0), + z0_res = svcvtl1_f16_x2_fpm (z0, fpm0)) + +/* +** cvtl1_bf16_mf8_x2_fpm: +** msr fpmr, x0 +** bf1cvtl {z0\.h - z1\.h}, z0\.b +** ret +*/ +TEST_X2_WIDE (cvtl1_bf16_mf8_x2_fpm, svbfloat16x2_t, svmfloat8_t, + z0_res = svcvtl1_bf16_mf8_x2_fpm (z0, fpm0), + z0_res = svcvtl1_bf16_x2_fpm (z0, fpm0)) + +/* +** cvtl2_f16_mf8_x2_fpm: +** msr fpmr, x0 +** f2cvtl {z0\.h - z1\.h}, z0\.b +** ret +*/ +TEST_X2_WIDE (cvtl2_f16_mf8_x2_fpm, svfloat16x2_t, svmfloat8_t, + z0_res = svcvtl2_f16_mf8_x2_fpm (z0, fpm0), + z0_res = svcvtl2_f16_x2_fpm (z0, fpm0)) + +/* +** cvtl2_bf16_mf8_x2_fpm: +** msr fpmr, x0 +** bf2cvtl {z0\.h - z1\.h}, z0\.b +** ret +*/ +TEST_X2_WIDE (cvtl2_bf16_mf8_x2_fpm, svbfloat16x2_t, svmfloat8_t, + z0_res = svcvtl2_bf16_mf8_x2_fpm (z0, fpm0), + z0_res = svcvtl2_bf16_x2_fpm (z0, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h index 0adb39ad8b2b..7c156c4cf2a7 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h @@ -767,6 +767,7 @@ #define TEST_X2_WIDE(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ PROTO (NAME, void, ()) \ { \ + register fpm_t fpm0 __asm ("x0"); \ register ZTYPE z0 __asm ("z0"); \ register ZTYPE z5 __asm ("z5"); \ register TTYPE z6 __asm ("z6"); \
