This patch introduces support for LUTI2/LUTI4 ACLE for SVE2. LUTI instructions are used for efficient table lookups with 2-bit or 4-bit indices. LUTI2 reads indexed 8-bit or 16-bit elements from the low 128 bits of the table vector using packed 2-bit indices, while LUTI4 can read from the low 128 or 256 bits of the table vector or from two table vectors using packed 4-bit indices. These instructions fill the destination vector by copying elements indexed by segments of the source vector, selected by the vector segment index.
The changes include the addition of a new AArch64 option extension "lut", __ARM_FEATURE_LUT preprocessor macro, definitions for the new LUTI instruction shapes, and implementations of the svluti2 and svluti4 builtins. New tests are added as well. --- This is a respin of https://gcc.gnu.org/pipermail/gcc-patches/2025-January/672910.html. Addressed comments on lut. The faminmax comments will be addressed in a separate patch. Regression tested on aarch64-unknown-linux-gnu and found no regressions. Ok for master? Thanks, Saurabh --- gcc/config/aarch64/aarch64-c.cc | 2 + .../aarch64/aarch64-sve-builtins-shapes.cc | 44 +++++++++++++ .../aarch64/aarch64-sve-builtins-shapes.h | 2 + .../aarch64/aarch64-sve-builtins-sve2.cc | 17 +++++ .../aarch64/aarch64-sve-builtins-sve2.def | 8 +++ .../aarch64/aarch64-sve-builtins-sve2.h | 2 + gcc/config/aarch64/aarch64-sve-builtins.cc | 7 +- gcc/config/aarch64/aarch64-sve2.md | 33 ++++++++++ gcc/config/aarch64/iterators.md | 13 ++++ .../aarch64/sve/acle/asm/test_sve_acle.h | 16 +++++ .../aarch64/sve/acle/general-c/lut_1.c | 64 +++++++++++++++++++ .../aarch64/sve/acle/general-c/lut_2.c | 11 ++++ .../aarch64/sve/acle/general-c/lut_3.c | 56 ++++++++++++++++ .../aarch64/sve2/acle/asm/luti2_bf16.c | 50 +++++++++++++++ .../aarch64/sve2/acle/asm/luti2_f16.c | 50 +++++++++++++++ .../aarch64/sve2/acle/asm/luti2_s16.c | 50 +++++++++++++++ .../aarch64/sve2/acle/asm/luti2_s8.c | 50 +++++++++++++++ .../aarch64/sve2/acle/asm/luti2_u16.c | 50 +++++++++++++++ .../aarch64/sve2/acle/asm/luti2_u8.c | 50 +++++++++++++++ .../aarch64/sve2/acle/asm/luti4_bf16.c | 50 +++++++++++++++ .../aarch64/sve2/acle/asm/luti4_bf16_x2.c | 30 +++++++++ .../aarch64/sve2/acle/asm/luti4_f16.c | 50 +++++++++++++++ .../aarch64/sve2/acle/asm/luti4_f16_x2.c | 30 +++++++++ .../aarch64/sve2/acle/asm/luti4_s16.c | 50 +++++++++++++++ .../aarch64/sve2/acle/asm/luti4_s16_x2.c | 30 +++++++++ .../aarch64/sve2/acle/asm/luti4_s8.c | 50 +++++++++++++++ .../aarch64/sve2/acle/asm/luti4_u16.c | 50 +++++++++++++++ .../aarch64/sve2/acle/asm/luti4_u16_x2.c | 30 +++++++++ .../aarch64/sve2/acle/asm/luti4_u8.c | 50 +++++++++++++++ gcc/testsuite/lib/target-supports.exp | 2 +- 30 files changed, 995 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/lut_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/lut_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/lut_3.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index 92fcf5389a3..d1e2ab9831d 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -268,6 +268,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_SVE_BF16, "__ARM_FEATURE_SVE_BF16", pfile); + aarch64_def_or_undef (TARGET_LUT, "__ARM_FEATURE_LUT", pfile); + aarch64_def_or_undef (TARGET_FP8, "__ARM_FEATURE_FP8", pfile); aarch64_def_or_undef (TARGET_FP8DOT2, "__ARM_FEATURE_FP8DOT2", pfile); diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index ca721dd2c09..6fbbfdd05b7 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -903,6 +903,50 @@ struct load_ext_gather_base : public overloaded_base<1> } }; + +/* sv<v0>_t svlut[_<t0>_g](sv<t0>x<g>_t, svuint8_t, uint64_t) + where the final argument is a constant index, the instruction divides + the vector argument in BITS-bit quantities. */ +template<unsigned int BITS> +struct luti_base : public overloaded_base<0> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + /* Format: return type, table vector, indices vector, immediate value. */ + b.add_overloaded_functions (group, MODE_none); + build_all (b, "v0,t0,vu8,su64", group, MODE_none); + } + + bool + check (function_checker &c) const override + { + auto max_range = c.type_suffix (0).element_bits / BITS - 1; + return c.require_immediate_range (2, 0, max_range); + } + + tree + resolve (function_resolver &r) const override + { + sve_type type; + if (!r.check_num_arguments (3) + || !(type = r.infer_sve_type (0)) + || !r.require_vector_type (1, VECTOR_TYPE_svuint8_t) + || !r.require_scalar_type (2, "uint64_t")) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type); + } +}; + +/* Specializations for 2-bit and 4-bit indices. */ +using luti2_def = luti_base<2>; +SHAPE (luti2) + +using luti4_def = luti_base<4>; +SHAPE (luti4) + + /* sv<t0>x<g>_t svfoo_t0_g(uint64_t, svuint8_t, uint64_t) where the first argument is the ZT register number (currently always 0) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h index 56e2f57b036..349eae6e34a 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h @@ -164,6 +164,8 @@ namespace aarch64_sve extern const function_shape *const load_gather64_vs_offset; extern const function_shape *const load_replicate; extern const function_shape *const load_za; + extern const function_shape *const luti2; + extern const function_shape *const luti4; extern const function_shape *const luti2_lane_zt; extern const function_shape *const luti4_lane_zt; extern const function_shape *const mmla; diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index 0c5290411bb..d9922de7ca5 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -914,6 +914,21 @@ public: unsigned int m_base; }; +class svluti_lane_impl : public function_base +{ +public: + CONSTEXPR svluti_lane_impl (unsigned int bits) : m_bits (bits) + {} + + rtx expand (function_expander &e) const override + { + auto mode = e.tuple_mode (0); + return e.use_exact_insn (code_for_aarch64_sve_luti (m_bits, mode)); + } + + unsigned int m_bits; +}; + } /* end anonymous namespace */ namespace aarch64_sve { @@ -1205,5 +1220,7 @@ FUNCTION (svzip, multireg_permute, (UNSPEC_ZIP)) FUNCTION (svzipq, multireg_permute, (UNSPEC_ZIPQ)) FUNCTION (svzipq1, svzipq_impl, (0)) FUNCTION (svzipq2, svzipq_impl, (1)) +FUNCTION (svluti2_lane, svluti_lane_impl, (2)) +FUNCTION (svluti4_lane, svluti_lane_impl, (4)) } /* end namespace aarch64_sve */ diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index e726fa1fb68..0cd187cc2bd 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -336,6 +336,14 @@ DEF_SVE_FUNCTION (svamax, binary_opt_single_n, all_float, mxz) DEF_SVE_FUNCTION (svamin, binary_opt_single_n, all_float, mxz) #undef REQUIRED_EXTENSIONS +#define REQUIRED_EXTENSIONS \ + sve_and_sme (AARCH64_FL_SVE2 | AARCH64_FL_LUT, \ + AARCH64_FL_SME2 | AARCH64_FL_LUT) +DEF_SVE_FUNCTION (svluti2_lane, luti2, bh_data, none) +DEF_SVE_FUNCTION (svluti4_lane, luti4, bh_data, none) +DEF_SVE_FUNCTION_GS (svluti4_lane, luti4, bh_data, x2, none) +#undef REQUIRED_EXTENSIONS + #define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F16F16) DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_f32_f16, x2, none) DEF_SVE_FUNCTION_GS (svcvtl, unary_convertxn, cvt_f32_f16, x2, none) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h index 14d668d2649..6d7d0af2641 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h @@ -250,6 +250,8 @@ namespace aarch64_sve extern const function_base *const svzipq; extern const function_base *const svzipq1; extern const function_base *const svzipq2; + extern const function_base *const svluti2_lane; + extern const function_base *const svluti4_lane; } } diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index cf8ca89aefa..2c55c9481eb 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -283,7 +283,11 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_bhs_integer(S, D) \ TYPES_bhs_signed (S, D), TYPES_bhs_unsigned (S, D) -#define TYPES_bhs_data(S, D) \ +#define TYPES_bh_data(S, D) \ + TYPES_b_data (S, D), \ + TYPES_h_data (S, D) + +#define TYPES_bhs_data(S, D) \ TYPES_b_data (S, D), \ TYPES_h_data (S, D), \ TYPES_s_data (S, D) @@ -782,6 +786,7 @@ DEF_SVE_TYPES_ARRAY (bs_unsigned); DEF_SVE_TYPES_ARRAY (bhs_signed); DEF_SVE_TYPES_ARRAY (bhs_unsigned); DEF_SVE_TYPES_ARRAY (bhs_integer); +DEF_SVE_TYPES_ARRAY (bh_data); DEF_SVE_TYPES_ARRAY (bhs_data); DEF_SVE_TYPES_ARRAY (bhs_widen); DEF_SVE_TYPES_ARRAY (c); diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index f8cfe08f4c0..0a1dc314f94 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -133,6 +133,7 @@ ;; ---- Optional AES extensions ;; ---- Optional SHA-3 extensions ;; ---- Optional SM4 extensions +;; ---- Table lookup ;; ========================================================================= ;; == Moves @@ -4211,3 +4212,35 @@ "sm4ekey\t%0.s, %1.s, %2.s" [(set_attr "type" "crypto_sm4")] ) + +;; ------------------------------------------------------------------------- +;; ---- Table lookup +;; ------------------------------------------------------------------------- +;; Includes: +;; - LUTI2 +;; - LUTI4 +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>" + [(set (match_operand:SVE_FULL_BH 0 "register_operand" "=w") + (unspec:SVE_FULL_BH + [(match_operand:SVE_FULL_BH 1 "register_operand" "w") + (match_operand:VNx16QI 2 "register_operand" "w") + (match_operand:DI 3 "const_int_operand") + (const_int LUTI_BITS)] + UNSPEC_SVE_LUTI))] + "TARGET_LUT && TARGET_SVE2_OR_SME2" + "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]" +) + +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>" + [(set (match_operand:<VSINGLE> 0 "register_operand" "=w") + (unspec:<VSINGLE> + [(match_operand:SVE_FULL_Hx2 1 "register_operand" "Uw2") + (match_operand:VNx16QI 2 "register_operand" "w") + (match_operand:DI 3 "const_int_operand") + (const_int LUTI_BITS)] + UNSPEC_SVE_LUTI))] + "TARGET_LUT && TARGET_SVE2_OR_SME2" + "luti<LUTI_BITS>\t%0.<Vetype>, %1, %2[%3]" +) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index ff0f34dd043..0fbf96f1ab9 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -553,6 +553,18 @@ (define_mode_iterator SVE_FULL_BHS [VNx16QI VNx8HI VNx4SI VNx8BF VNx8HF VNx4SF]) +;; Fully-packed SVE vector byte modes that have 32-bit or smaller elements. +(define_mode_iterator SVE_FULL_BS [VNx16QI VNx4SI VNx4SF]) + +;; Fully-packed SVE vector byte modes that have 16-bit or smaller elements. +(define_mode_iterator SVE_FULL_BH [VNx16QI VNx8HI VNx8HF VNx8BF]) + +;; Fully-packed half word SVE vector modes +(define_mode_iterator SVE_FULL_H [VNx8HI VNx8HF VNx8BF]) + +;; Pairs of fully-packed SVE vector modes (half word only) +(define_mode_iterator SVE_FULL_Hx2 [VNx16HI VNx16HF VNx16BF]) + ;; Fully-packed SVE vector modes that have 32-bit elements. (define_mode_iterator SVE_FULL_S [VNx4SI VNx4SF]) @@ -1186,6 +1198,7 @@ UNSPEC_UZPQ2 UNSPEC_ZIPQ1 UNSPEC_ZIPQ2 + UNSPEC_SVE_LUTI ;; All used in aarch64-sme.md UNSPEC_SME_ADD diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h index d3ae707ac49..c0dd89fa924 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h @@ -780,4 +780,20 @@ "w" (z16), "w" (z22), "w" (z29)); \ } +#define TEST_1X2_NARROW(NAME, RTYPE, TTYPE, ZTYPE, CODE1, CODE2) \ + PROTO(NAME, void, ()) \ + { \ + register RTYPE z0 __asm ("z0"); \ + register ZTYPE z5 __asm ("z5"); \ + register TTYPE z6 __asm ("z6"); \ + register RTYPE z16 __asm ("z16"); \ + register ZTYPE z22 __asm ("z22"); \ + register TTYPE z29 __asm ("z29"); \ + register RTYPE z0_res __asm ("z0"); \ + __asm volatile ("" : "=w" (z0), "=w" (z5), "=w" (z6), \ + "=w" (z16), "=w" (z22), "=w" (z29)); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "w" (z0_res), "w" (z5), "w" (z22)); \ + } + #endif diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/lut_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/lut_1.c new file mode 100644 index 00000000000..142de490267 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/lut_1.c @@ -0,0 +1,64 @@ +/* { dg-do compile } */ + +#include <arm_sve.h> + +#pragma GCC target ("arch=armv9.2-a+sve2+lut") + +void +test (svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, + svfloat16x2_t f16x2, svfloat32x2_t f32x2, svfloat64x2_t f64x2, + svuint8_t u8, svuint16_t u16, svuint32_t u32, svuint64_t u64, + svuint8x2_t u8x2, svuint16x2_t u16x2, + svuint32x2_t u32x2, svuint64x2_t u64x2, + svint8_t s8, svint16_t s16, svint32_t s32, svint64_t s64, + svint8x2_t s8x2, svint16x2_t s16x2, svint32x2_t s32x2, svint64x2_t s64x2, + svbfloat16_t bf16, svbfloat16x2_t bf16x2) +{ + svluti2_lane (f16, u8, 0); + svluti2_lane (bf16, u8, 0); + + svluti2_lane (f32, u8, 0); /* { dg-error {'svluti2_lane' has no form that takes 'svfloat32_t' arguments} } */ + svluti2_lane (f64, u8, 0); /* { dg-error {'svluti2_lane' has no form that takes 'svfloat64_t' arguments} } */ + + svluti2_lane (u8, u8, 0); + svluti2_lane (u16, u8, 0); + + svluti2_lane (u32, u8, 0); /* { dg-error {'svluti2_lane' has no form that takes 'svuint32_t' arguments} } */ + svluti2_lane (u64, u8, 0); /* { dg-error {'svluti2_lane' has no form that takes 'svuint64_t' arguments} } */ + + svluti2_lane (s8, u8, 0); + svluti2_lane (s16, u8, 0); + + svluti2_lane (s32, u8, 0); /* { dg-error {'svluti2_lane' has no form that takes 'svint32_t' arguments} } */ + svluti2_lane (s64, u8, 0); /* { dg-error {'svluti2_lane' has no form that takes 'svint64_t' arguments} } */ + + svluti4_lane (f16, u8, 0); + svluti4_lane (bf16, u8, 0); + svluti4_lane_x2 (f16x2, u8, 0); + svluti4_lane_x2 (bf16x2, u8, 0); + + svluti4_lane (f32, u8, 0); /* { dg-error {'svluti4_lane' has no form that takes 'svfloat32_t' arguments} } */ + svluti4_lane (f64, u8, 0); /* { dg-error {'svluti4_lane' has no form that takes 'svfloat64_t' arguments} } */ + svluti4_lane_x2 (f32x2, u8, 0); /* { dg-error {'svluti4_lane_x2' has no form that takes 'svfloat32x2_t' arguments} } */ + svluti4_lane_x2 (f64x2, u8, 0); /* { dg-error {'svluti4_lane_x2' has no form that takes 'svfloat64x2_t' arguments} } */ + + svluti4_lane (u8, u8, 0); + svluti4_lane (u16, u8, 0); + svluti4_lane_x2 (u8x2, u8, 0); + svluti4_lane_x2 (u16x2, u8, 0); + + svluti4_lane (u32, u8, 0); /* { dg-error {'svluti4_lane' has no form that takes 'svuint32_t' arguments} } */ + svluti4_lane (u64, u8, 0); /* { dg-error {'svluti4_lane' has no form that takes 'svuint64_t' arguments} } */ + svluti4_lane_x2 (u32x2, u8, 0); /* { dg-error {'svluti4_lane_x2' has no form that takes 'svuint32x2_t' arguments} } */ + svluti4_lane_x2 (u64x2, u8, 0); /* { dg-error {'svluti4_lane_x2' has no form that takes 'svuint64x2_t' arguments} } */ + + svluti4_lane (s8, u8, 0); + svluti4_lane (s16, u8, 0); + svluti4_lane_x2 (s8x2, u8, 0); + svluti4_lane_x2 (s16x2, u8, 0); + + svluti4_lane (s32, u8, 0); /* { dg-error {'svluti4_lane' has no form that takes 'svint32_t' arguments} } */ + svluti4_lane (s64, u8, 0); /* { dg-error {'svluti4_lane' has no form that takes 'svint64_t' arguments} } */ + svluti4_lane_x2 (s32x2, u8, 0); /* { dg-error {'svluti4_lane_x2' has no form that takes 'svint32x2_t' arguments} } */ + svluti4_lane_x2 (s64x2, u8, 0); /* { dg-error {'svluti4_lane_x2' has no form that takes 'svint64x2_t' arguments} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/lut_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/lut_2.c new file mode 100644 index 00000000000..3c0664df943 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/lut_2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ + +#include <arm_sve.h> + +#pragma GCC target ("arch=armv9.2-a+sve2") + +void +test (svfloat16_t f16, svuint8_t u8) +{ + svluti2_lane (f16, u8, 0); /* { dg-error {ACLE function 'svluti2_lane_f16' requires ISA extension 'lut'} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/lut_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/lut_3.c new file mode 100644 index 00000000000..d35fda9e5b0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/lut_3.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ + +#include <arm_sve.h> + +#pragma GCC target ("arch=armv9.2-a+sve2+lut") + +void +test (svfloat16_t f16, svfloat16x2_t f16x2, + svuint8_t u8, svuint16_t u16, svuint8x2_t u8x2, svuint16x2_t u16x2, + svint8_t s8, svint16_t s16, svint8x2_t s8x2, svint16x2_t s16x2, + svbfloat16_t bf16, svbfloat16x2_t bf16x2) +{ + svluti2_lane (f16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */ + svluti2_lane (f16, u8, 8); /* { dg-error {passing 8 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */ + + svluti2_lane (bf16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */ + svluti2_lane (bf16, u8, 8); /* { dg-error {passing 8 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */ + + svluti2_lane (u8, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 3\]} } */ + svluti2_lane (u8, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 3\]} } */ + svluti2_lane (u16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */ + svluti2_lane (u16, u8, 8); /* { dg-error {passing 8 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */ + + svluti2_lane (s8, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 3\]} } */ + svluti2_lane (s8, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 3\]} } */ + svluti2_lane (s16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */ + svluti2_lane (s16, u8, 8); /* { dg-error {passing 8 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */ + + svluti4_lane (f16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */ + svluti4_lane (f16, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */ + svluti4_lane_x2 (f16x2, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */ + svluti4_lane_x2 (f16x2, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */ + + svluti4_lane (bf16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */ + svluti4_lane (bf16, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */ + svluti4_lane_x2 (bf16x2, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */ + svluti4_lane_x2 (bf16x2, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */ + + svluti4_lane (u8, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 1\]} } */ + svluti4_lane (u8, u8, 2); /* { dg-error {passing 2 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 1\]} } */ + svluti4_lane (u16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */ + svluti4_lane (u16, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */ + svluti4_lane_x2 (u8x2, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 1\]} } */ + svluti4_lane_x2 (u8x2, u8, 2); /* { dg-error {passing 2 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 1\]} } */ + svluti4_lane_x2 (u16x2, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */ + svluti4_lane_x2 (u16x2, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */ + + svluti4_lane (s8, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 1\]} } */ + svluti4_lane (s8, u8, 2); /* { dg-error {passing 2 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 1\]} } */ + svluti4_lane (s16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */ + svluti4_lane (s16, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */ + svluti4_lane_x2 (s8x2, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 1\]} } */ + svluti4_lane_x2 (s8x2, u8, 2); /* { dg-error {passing 2 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 1\]} } */ + svluti4_lane_x2 (s16x2, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */ + svluti4_lane_x2 (s16x2, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c new file mode 100644 index 00000000000..f25c2f8bb45 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c @@ -0,0 +1,50 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti2_min_idx_test: +** luti2 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_min_idx_test, svbfloat16_t, svuint8_t, z1, + svluti2_lane_bf16 (z28, z0, 0), + svluti2_lane (z28, z0, 0)) + +/* +** luti2_max_idx_test: +** luti2 z1\.h, \{ z28\.h \}, z0\[7\] +** ret +*/ + +TEST_XN_SINGLE (luti2_max_idx_test, svbfloat16_t, svuint8_t, z1, + svluti2_lane_bf16 (z28, z0, 7), + svluti2_lane (z28, z0, 7)) + +/* +** luti2_tied_min_idx_test: +** luti2 z28\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_tied_min_idx_test, svbfloat16_t, svuint8_t, z28, + svluti2_lane_bf16 (z28, z0, 0), + svluti2_lane (z28, z0, 0)) + +/* +** luti2_tied_max_idx_test: +** luti2 z28\.h, \{ z28\.h \}, z0\[7\] +** ret +*/ + +TEST_XN_SINGLE (luti2_tied_max_idx_test, svbfloat16_t, svuint8_t, z28, + svluti2_lane_bf16 (z28, z0, 7), + svluti2_lane (z28, z0, 7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c new file mode 100644 index 00000000000..39c814c08f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c @@ -0,0 +1,50 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti2_min_idx_test: +** luti2 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_min_idx_test, svfloat16_t, svuint8_t, z1, + svluti2_lane_f16 (z28, z0, 0), + svluti2_lane (z28, z0, 0)) + +/* +** luti2_max_idx_test: +** luti2 z1\.h, \{ z28\.h \}, z0\[7\] +** ret +*/ + +TEST_XN_SINGLE (luti2_max_idx_test, svfloat16_t, svuint8_t, z1, + svluti2_lane_f16 (z28, z0, 7), + svluti2_lane (z28, z0, 7)) + +/* +** luti2_tied_min_idx_test: +** luti2 z28\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_tied_min_idx_test, svfloat16_t, svuint8_t, z28, + svluti2_lane_f16 (z28, z0, 0), + svluti2_lane (z28, z0, 0)) + +/* +** luti2_tied_max_idx_test: +** luti2 z28\.h, \{ z28\.h \}, z0\[7\] +** ret +*/ + +TEST_XN_SINGLE (luti2_tied_max_idx_test, svfloat16_t, svuint8_t, z28, + svluti2_lane_f16 (z28, z0, 7), + svluti2_lane (z28, z0, 7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c new file mode 100644 index 00000000000..5f4bc0d488b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c @@ -0,0 +1,50 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti2_min_idx_test: +** luti2 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_min_idx_test, svint16_t, svuint8_t, z1, + svluti2_lane_s16 (z28, z0, 0), + svluti2_lane (z28, z0, 0)) + +/* +** luti2_max_idx_test: +** luti2 z1\.h, \{ z28\.h \}, z0\[7\] +** ret +*/ + +TEST_XN_SINGLE (luti2_max_idx_test, svint16_t, svuint8_t, z1, + svluti2_lane_s16 (z28, z0, 7), + svluti2_lane (z28, z0, 7)) + +/* +** luti2_tied_min_idx_test: +** luti2 z28\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_tied_min_idx_test, svint16_t, svuint8_t, z28, + svluti2_lane_s16 (z28, z0, 0), + svluti2_lane (z28, z0, 0)) + +/* +** luti2_tied_max_idx_test: +** luti2 z28\.h, \{ z28\.h \}, z0\[7\] +** ret +*/ + +TEST_XN_SINGLE (luti2_tied_max_idx_test, svint16_t, svuint8_t, z28, + svluti2_lane_s16 (z28, z0, 7), + svluti2_lane (z28, z0, 7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c new file mode 100644 index 00000000000..f923296f44b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c @@ -0,0 +1,50 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti2_min_idx_test: +** luti2 z1\.b, \{ z28\.b \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_min_idx_test, svint8_t, svuint8_t, z1, + svluti2_lane_s8 (z28, z0, 0), + svluti2_lane (z28, z0, 0)) + +/* +** luti2_max_idx_test: +** luti2 z1\.b, \{ z28\.b \}, z0\[3\] +** ret +*/ + +TEST_XN_SINGLE (luti2_max_idx_test, svint8_t, svuint8_t, z1, + svluti2_lane_s8 (z28, z0, 3), + svluti2_lane (z28, z0, 3)) + +/* +** luti2_tied_min_idx_test: +** luti2 z28\.b, \{ z28\.b \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_tied_min_idx_test, svint8_t, svuint8_t, z28, + svluti2_lane_s8 (z28, z0, 0), + svluti2_lane (z28, z0, 0)) + +/* +** luti2_tied_max_idx_test: +** luti2 z28\.b, \{ z28\.b \}, z0\[3\] +** ret +*/ + +TEST_XN_SINGLE (luti2_tied_max_idx_test, svint8_t, svuint8_t, z28, + svluti2_lane_s8 (z28, z0, 3), + svluti2_lane (z28, z0, 3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c new file mode 100644 index 00000000000..bb9c3e098f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c @@ -0,0 +1,50 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti2_min_idx_test: +** luti2 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_min_idx_test, svuint16_t, svuint8_t, z1, + svluti2_lane_u16 (z28, z0, 0), + svluti2_lane (z28, z0, 0)) + +/* +** luti2_max_idx_test: +** luti2 z1\.h, \{ z28\.h \}, z0\[7\] +** ret +*/ + +TEST_XN_SINGLE (luti2_max_idx_test, svuint16_t, svuint8_t, z1, + svluti2_lane_u16 (z28, z0, 7), + svluti2_lane (z28, z0, 7)) + +/* +** luti2_tied_min_idx_test: +** luti2 z28\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_tied_min_idx_test, svuint16_t, svuint8_t, z28, + svluti2_lane_u16 (z28, z0, 0), + svluti2_lane (z28, z0, 0)) + +/* +** luti2_tied_max_idx_test: +** luti2 z28\.h, \{ z28\.h \}, z0\[7\] +** ret +*/ + +TEST_XN_SINGLE (luti2_tied_max_idx_test, svuint16_t, svuint8_t, z28, + svluti2_lane_u16 (z28, z0, 7), + svluti2_lane (z28, z0, 7)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c new file mode 100644 index 00000000000..895d850aaa6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c @@ -0,0 +1,50 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti2_min_idx_test: +** luti2 z1\.b, \{ z28\.b \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_min_idx_test, svuint8_t, svuint8_t, z1, + svluti2_lane_u8 (z28, z0, 0), + svluti2_lane (z28, z0, 0)) + +/* +** luti2_max_idx_test: +** luti2 z1\.b, \{ z28\.b \}, z0\[3\] +** ret +*/ + +TEST_XN_SINGLE (luti2_max_idx_test, svuint8_t, svuint8_t, z1, + svluti2_lane_u8 (z28, z0, 3), + svluti2_lane (z28, z0, 3)) + +/* +** luti2_tied_min_idx_test: +** luti2 z28\.b, \{ z28\.b \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti2_tied_min_idx_test, svuint8_t, svuint8_t, z28, + svluti2_lane_u8 (z28, z0, 0), + svluti2_lane (z28, z0, 0)) + +/* +** luti2_tied_max_idx_test: +** luti2 z28\.b, \{ z28\.b \}, z0\[3\] +** ret +*/ + +TEST_XN_SINGLE (luti2_tied_max_idx_test, svuint8_t, svuint8_t, z28, + svluti2_lane_u8 (z28, z0, 3), + svluti2_lane (z28, z0, 3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c new file mode 100644 index 00000000000..b005ba6ca47 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c @@ -0,0 +1,50 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti4_min_idx_test: +** luti4 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_min_idx_test, svbfloat16_t, svuint8_t, z1, + svluti4_lane_bf16 (z28, z0, 0), + svluti4_lane (z28, z0, 0)) + +/* +** luti4_max_idx_test: +** luti4 z1\.h, \{ z28\.h \}, z0\[3\] +** ret +*/ + +TEST_XN_SINGLE (luti4_max_idx_test, svbfloat16_t, svuint8_t, z1, + svluti4_lane_bf16 (z28, z0, 3), + svluti4_lane (z28, z0, 3)) + +/* +** luti4_tied_min_idx_test: +** luti4 z28\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_tied_min_idx_test, svbfloat16_t, svuint8_t, z28, + svluti4_lane_bf16 (z28, z0, 0), + svluti4_lane (z28, z0, 0)) + +/* +** luti4_tied_max_idx_test: +** luti4 z28\.h, \{ z28\.h \}, z0\[3\] +** ret +*/ + +TEST_XN_SINGLE (luti4_tied_max_idx_test, svbfloat16_t, svuint8_t, z28, + svluti4_lane_bf16 (z28, z0, 3), + svluti4_lane (z28, z0, 3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c new file mode 100644 index 00000000000..1f3f8aab5ef --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c @@ -0,0 +1,30 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti4_min_idx_test: +** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[0\] +** ret +*/ + +TEST_1X2_NARROW(luti4_min_idx_test, svbfloat16_t, svbfloat16x2_t, svuint8_t, + z0_res = svluti4_lane_bf16_x2 (z6, z5, 0), + z0_res = svluti4_lane_x2 (z6, z5, 0)) + +/* +** luti4_max_idx_test: +** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[3\] +** ret +*/ + +TEST_1X2_NARROW(luti4_max_idx_test, svbfloat16_t, svbfloat16x2_t, svuint8_t, + z0_res = svluti4_lane_bf16_x2 (z6, z5, 3), + z0_res = svluti4_lane_x2 (z6, z5, 3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c new file mode 100644 index 00000000000..644c0b425c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c @@ -0,0 +1,50 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti4_min_idx_test: +** luti4 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_min_idx_test, svfloat16_t, svuint8_t, z1, + svluti4_lane_f16 (z28, z0, 0), + svluti4_lane (z28, z0, 0)) + +/* +** luti4_max_idx_test: +** luti4 z1\.h, \{ z28\.h \}, z0\[3\] +** ret +*/ + +TEST_XN_SINGLE (luti4_max_idx_test, svfloat16_t, svuint8_t, z1, + svluti4_lane_f16 (z28, z0, 3), + svluti4_lane (z28, z0, 3)) + +/* +** luti4_tied_min_idx_test: +** luti4 z28\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_tied_min_idx_test, svfloat16_t, svuint8_t, z28, + svluti4_lane_f16 (z28, z0, 0), + svluti4_lane (z28, z0, 0)) + +/* +** luti4_tied_max_idx_test: +** luti4 z28\.h, \{ z28\.h \}, z0\[3\] +** ret +*/ + +TEST_XN_SINGLE (luti4_tied_max_idx_test, svfloat16_t, svuint8_t, z28, + svluti4_lane_f16 (z28, z0, 3), + svluti4_lane (z28, z0, 3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c new file mode 100644 index 00000000000..041f7fff1e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c @@ -0,0 +1,30 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti4_min_idx_test: +** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[0\] +** ret +*/ + +TEST_1X2_NARROW(luti4_min_idx_test, svfloat16_t, svfloat16x2_t, svuint8_t, + z0_res = svluti4_lane_f16_x2 (z6, z5, 0), + z0_res = svluti4_lane_x2 (z6, z5, 0)) + +/* +** luti4_max_idx_test: +** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[3\] +** ret +*/ + +TEST_1X2_NARROW(luti4_max_idx_test, svfloat16_t, svfloat16x2_t, svuint8_t, + z0_res = svluti4_lane_f16_x2 (z6, z5, 3), + z0_res = svluti4_lane_x2 (z6, z5, 3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c new file mode 100644 index 00000000000..ad35a214549 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c @@ -0,0 +1,50 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti4_min_idx_test: +** luti4 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_min_idx_test, svint16_t, svuint8_t, z1, + svluti4_lane_s16 (z28, z0, 0), + svluti4_lane (z28, z0, 0)) + +/* +** luti4_max_idx_test: +** luti4 z1\.h, \{ z28\.h \}, z0\[3\] +** ret +*/ + +TEST_XN_SINGLE (luti4_max_idx_test, svint16_t, svuint8_t, z1, + svluti4_lane_s16 (z28, z0, 3), + svluti4_lane (z28, z0, 3)) + +/* +** luti4_tied_min_idx_test: +** luti4 z28\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_tied_min_idx_test, svint16_t, svuint8_t, z28, + svluti4_lane_s16 (z28, z0, 0), + svluti4_lane (z28, z0, 0)) + +/* +** luti4_tied_max_idx_test: +** luti4 z28\.h, \{ z28\.h \}, z0\[3\] +** ret +*/ + +TEST_XN_SINGLE (luti4_tied_max_idx_test, svint16_t, svuint8_t, z28, + svluti4_lane_s16 (z28, z0, 3), + svluti4_lane (z28, z0, 3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c new file mode 100644 index 00000000000..96c9a0d48de --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c @@ -0,0 +1,30 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti4_min_idx_test: +** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[0\] +** ret +*/ + +TEST_1X2_NARROW(luti4_min_idx_test, svint16_t, svint16x2_t, svuint8_t, + z0_res = svluti4_lane_s16_x2 (z6, z5, 0), + z0_res = svluti4_lane_x2 (z6, z5, 0)) + +/* +** luti4_max_idx_test: +** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[3\] +** ret +*/ + +TEST_1X2_NARROW(luti4_max_idx_test, svint16_t, svint16x2_t, svuint8_t, + z0_res = svluti4_lane_s16_x2 (z6, z5, 3), + z0_res = svluti4_lane_x2 (z6, z5, 3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c new file mode 100644 index 00000000000..b6f8a5c7c4a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c @@ -0,0 +1,50 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti4_min_idx_test: +** luti4 z1\.b, \{ z28\.b \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_min_idx_test, svint8_t, svuint8_t, z1, + svluti4_lane_s8 (z28, z0, 0), + svluti4_lane (z28, z0, 0)) + +/* +** luti4_max_idx_test: +** luti4 z1\.b, \{ z28\.b \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti4_max_idx_test, svint8_t, svuint8_t, z1, + svluti4_lane_s8 (z28, z0, 1), + svluti4_lane (z28, z0, 1)) + +/* +** luti4_tied_min_idx_test: +** luti4 z28\.b, \{ z28\.b \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_tied_min_idx_test, svint8_t, svuint8_t, z28, + svluti4_lane_s8 (z28, z0, 0), + svluti4_lane (z28, z0, 0)) + +/* +** luti4_tied_max_idx_test: +** luti4 z28\.b, \{ z28\.b \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti4_tied_max_idx_test, svint8_t, svuint8_t, z28, + svluti4_lane_s8 (z28, z0, 1), + svluti4_lane (z28, z0, 1)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c new file mode 100644 index 00000000000..92138bd7b41 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c @@ -0,0 +1,50 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti4_min_idx_test: +** luti4 z1\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_min_idx_test, svuint16_t, svuint8_t, z1, + svluti4_lane_u16 (z28, z0, 0), + svluti4_lane (z28, z0, 0)) + +/* +** luti4_max_idx_test: +** luti4 z1\.h, \{ z28\.h \}, z0\[3\] +** ret +*/ + +TEST_XN_SINGLE (luti4_max_idx_test, svuint16_t, svuint8_t, z1, + svluti4_lane_u16 (z28, z0, 3), + svluti4_lane (z28, z0, 3)) + +/* +** luti4_tied_min_idx_test: +** luti4 z28\.h, \{ z28\.h \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_tied_min_idx_test, svuint16_t, svuint8_t, z28, + svluti4_lane_u16 (z28, z0, 0), + svluti4_lane (z28, z0, 0)) + +/* +** luti4_tied_max_idx_test: +** luti4 z28\.h, \{ z28\.h \}, z0\[3\] +** ret +*/ + +TEST_XN_SINGLE (luti4_tied_max_idx_test, svuint16_t, svuint8_t, z28, + svluti4_lane_u16 (z28, z0, 3), + svluti4_lane (z28, z0, 3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c new file mode 100644 index 00000000000..4de2234d9af --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c @@ -0,0 +1,30 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti4_min_idx_test: +** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[0\] +** ret +*/ + +TEST_1X2_NARROW(luti4_min_idx_test, svuint16_t, svuint16x2_t, svuint8_t, + z0_res = svluti4_lane_u16_x2 (z6, z5, 0), + z0_res = svluti4_lane_x2 (z6, z5, 0)) + +/* +** luti4_max_idx_test: +** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[3\] +** ret +*/ + +TEST_1X2_NARROW(luti4_max_idx_test, svuint16_t, svuint16x2_t, svuint8_t, + z0_res = svluti4_lane_u16_x2 (z6, z5, 3), + z0_res = svluti4_lane_x2 (z6, z5, 3)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c new file mode 100644 index 00000000000..e1e9523e375 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c @@ -0,0 +1,50 @@ +/* { dg-do assemble { target aarch64_asm_lut_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2+lut" +#if STREAMING_COMPATIBLE +#pragma GCC target "+sme2" +#endif + +/* +** luti4_min_idx_test: +** luti4 z1\.b, \{ z28\.b \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_min_idx_test, svuint8_t, svuint8_t, z1, + svluti4_lane_u8 (z28, z0, 0), + svluti4_lane (z28, z0, 0)) + +/* +** luti4_max_idx_test: +** luti4 z1\.b, \{ z28\.b \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti4_max_idx_test, svuint8_t, svuint8_t, z1, + svluti4_lane_u8 (z28, z0, 1), + svluti4_lane (z28, z0, 1)) + +/* +** luti4_tied_min_idx_test: +** luti4 z28\.b, \{ z28\.b \}, z0\[0\] +** ret +*/ + +TEST_XN_SINGLE (luti4_tied_min_idx_test, svuint8_t, svuint8_t, z28, + svluti4_lane_u8 (z28, z0, 0), + svluti4_lane (z28, z0, 0)) + +/* +** luti4_tied_max_idx_test: +** luti4 z28\.b, \{ z28\.b \}, z0\[1\] +** ret +*/ + +TEST_XN_SINGLE (luti4_tied_max_idx_test, svuint8_t, svuint8_t, z28, + svluti4_lane_u8 (z28, z0, 1), + svluti4_lane (z28, z0, 1)) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 939ef3a4119..a8f87b66cd7 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -12304,7 +12304,7 @@ proc check_effective_target_aarch64_tiny { } { foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve" "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" "ls64" - "sme" "sme-i16i64" "sme2" "sve-b16b16" + "lut" "sme" "sme-i16i64" "sme2" "sve-b16b16" "sme-b16b16" "sme-f16f16" "sme2p1" "fp8" "fp8fma" "ssve-fp8fma" "fp8dot2" "ssve-fp8dot2" "fp8dot4" "ssve-fp8dot4"} {