The AArch64 FEAT_LUT extension is optional from Armv9.2-a and mandatory from Armv9.5-a. This extension introduces instructions for lookup table read with 2-bit indices.
This patch adds AdvSIMD LUT intrinsics for LUTI2, supporting table lookup with 2-bit packed indices. The following intrinsics are added: * vluti2{q}_lane{q}_u8 * vluti2{q}_lane{q}_s8 * vluti2{q}_lane{q}_p8 * vluti2{q}_lane{q}_u16 * vluti2{q}_lane{q}_s16 * vluti2{q}_lane{q}_p16 * vluti2{q}_lane{q}_f16 * vluti2{q}_lane{q}_bf16 gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (enum class): Add binary_lane shape. (aarch64_fntype): Modify to handle binary_lane shape. (aarch64_expand_pragma_builtin): Extend to distinguish and expand binary and binary lane-based intrinsics. * config/aarch64/aarch64-option-extensions.def (AARCH64_OPT_EXTENSION): Add LUT feature flag. * config/aarch64/aarch64-simd-pragma-builtins.def (ENTRY_LANE): New macro for lane-based intrinsics. (ENTRY_VLANEIU): New macro for LUTI lanes (unsigned). (ENTRY_VLANEIS): New macro for LUTI lanes (signed). (ENTRY_VLANEP): New macro for LUTI lanes (poly). (ENTRY_VLANEF): New macro for LUTI lanes (float). (ENTRY_VLANEBF): New macro for LUTI lanes (bfloat). (REQUIRED_EXTENSIONS): Set per LUTI requirements. * config/aarch64/aarch64-simd.md (@aarch64_<vluti_uns_op><VLUT1:mode><VLUT2:mode>): Add instruction pattern for LUTI2 instructions. * config/aarch64/aarch64.h (TARGET_LUT): Add TARGET_LUT macro for enabling LUT extension support. * config/aarch64/iterators.md (v16qi): Update iterators to include VLUT1 and VLUT2 for LUTI2 operations. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/vluti-builtins.c: New test. --- gcc/config/aarch64/aarch64-builtins.cc | 22 +- .../aarch64/aarch64-option-extensions.def | 2 + .../aarch64/aarch64-simd-pragma-builtins.def | 61 ++++ gcc/config/aarch64/aarch64-simd.md | 10 + gcc/config/aarch64/aarch64.h | 4 + gcc/config/aarch64/iterators.md | 25 ++ .../gcc.target/aarch64/simd/vluti-builtins.c | 329 ++++++++++++++++++ 7 files changed, 452 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vluti-builtins.c
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index ad82c680c6a..65813091f23 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -1591,6 +1591,7 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma) enum class aarch64_builtin_signatures { binary, + binary_lane, }; namespace { @@ -1656,6 +1657,7 @@ static tree aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data) { tree type0, type1, type2; + tree immtype = aarch64_simd_builtin_type (SImode, qualifier_lane_index); switch (builtin_data.signature) { @@ -1668,6 +1670,16 @@ aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data) builtin_data.types[2].qualifiers); return build_function_type_list (type0, type1, type2, NULL_TREE); + case aarch64_builtin_signatures::binary_lane: + type0 = aarch64_simd_builtin_type (builtin_data.types[0].mode, + builtin_data.types[0].qualifiers); + type1 = aarch64_simd_builtin_type (builtin_data.types[1].mode, + builtin_data.types[1].qualifiers); + type2 = aarch64_simd_builtin_type (builtin_data.types[2].mode, + builtin_data.types[2].qualifiers); + return build_function_type_list (type0, type1, type2, + immtype, NULL_TREE); + default: gcc_unreachable (); } @@ -3383,7 +3395,7 @@ static rtx aarch64_expand_pragma_builtin (tree exp, rtx target, const aarch64_pragma_builtins_data *builtin_data) { - expand_operand ops[3]; + expand_operand ops[4]; auto op1 = expand_normal (CALL_EXPR_ARG (exp, 0)); auto op2 = expand_normal (CALL_EXPR_ARG (exp, 1)); create_output_operand (&ops[0], target, builtin_data->types[0].mode); @@ -3399,6 +3411,14 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, icode = code_for_aarch64 (unspec, builtin_data->types[0].mode); expand_insn (icode, 3, ops); break; + case aarch64_builtin_signatures::binary_lane: + rtx op3; + op3 = expand_normal (CALL_EXPR_ARG (exp, 2)); + create_input_operand (&ops[3], op3, SImode); + icode = code_for_aarch64 (unspec, + builtin_data->types[1].mode, builtin_data->types[2].mode); + expand_insn (icode, 4, ops); + break; default: gcc_unreachable(); } diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 8279f5a76ea..e3cafd90df9 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -236,6 +236,8 @@ AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "fp8") AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax") +AARCH64_OPT_EXTENSION("lut", LUT, (SIMD), (), (), "lut") + #undef AARCH64_OPT_FMV_EXTENSION #undef AARCH64_OPT_EXTENSION #undef AARCH64_FMV_FEATURE diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def index c669919fa04..5ae36673292 100644 --- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def @@ -31,8 +31,69 @@ ENTRY_BINARY (NAME##q_f32, SIGNATURE, f32q, f32q, f32q, UNSPEC) \ ENTRY_BINARY (NAME##q_f64, SIGNATURE, f64q, f64q, f64q, UNSPEC) +#undef ENTRY_LANE +#define ENTRY_LANE(N, S, T0, T1, T2, U) \ + ENTRY (N, S, T0, T1, T2, u8, U) + +#undef ENTRY_VLANEIU +#define ENTRY_VLANEIU(N, S, U) \ + ENTRY_LANE (N##_lane_u8, S, u8q, u8, u8, U) \ + ENTRY_LANE (N##_laneq_u8, S, u8q, u8, u8q, U) \ + ENTRY_LANE (N##q_lane_u8, S, u8q, u8q, u8, U) \ + ENTRY_LANE (N##q_laneq_u8, S, u8q, u8q, u8q, U) \ + ENTRY_LANE (N##_lane_u16, S, u16q, u16, u8, U) \ + ENTRY_LANE (N##_laneq_u16, S, u16q, u16, u8q, U) \ + ENTRY_LANE (N##q_lane_u16, S, u16q, u16q, u8, U) \ + ENTRY_LANE (N##q_laneq_u16, S, u16q, u16q, u8q, U) + +#undef ENTRY_VLANEIS +#define ENTRY_VLANEIS(N, S, U) \ + ENTRY_LANE (N##_lane_s8, S, s8q, s8, u8, U) \ + ENTRY_LANE (N##_laneq_s8, S, s8q, s8, u8q, U) \ + ENTRY_LANE (N##q_lane_s8, S, s8q, s8q, u8, U) \ + ENTRY_LANE (N##q_laneq_s8, S, s8q, s8q, u8q, U) \ + ENTRY_LANE (N##_lane_s16, S, s16q, s16, u8, U) \ + ENTRY_LANE (N##_laneq_s16, S, s16q, s16, u8q, U) \ + ENTRY_LANE (N##q_lane_s16, S, s16q, s16q, u8, U) \ + ENTRY_LANE (N##q_laneq_s16, S, s16q, s16q, u8q, U) + +#undef ENTRY_VLANEP +#define ENTRY_VLANEP(N, S, U) \ + ENTRY_LANE (N##_lane_p8, S, p8q, p8, u8, U) \ + ENTRY_LANE (N##_laneq_p8, S, p8q, p8, u8q, U) \ + ENTRY_LANE (N##q_lane_p8, S, p8q, p8q, u8, U) \ + ENTRY_LANE (N##q_laneq_p8, S, p8q, p8q, u8q, U) \ + ENTRY_LANE (N##_lane_p16, S, p16q, p16, u8, U) \ + ENTRY_LANE (N##_laneq_p16, S, p16q, p16, u8q, U) \ + ENTRY_LANE (N##q_lane_p16, S, p16q, p16q, u8, U) \ + ENTRY_LANE (N##q_laneq_p16, S, p16q, p16q, u8q, U) + + +#undef ENTRY_VLANEF +#define ENTRY_VLANEF(N, S, U) \ + ENTRY_LANE (N##_lane_f16, S, f16q, f16, u8, U) \ + ENTRY_LANE (N##_laneq_f16, S, f16q, f16, u8q, U) \ + ENTRY_LANE (N##q_lane_f16, S, f16q, f16q, u8, U) \ + ENTRY_LANE (N##q_laneq_f16, S, f16q, f16q, u8q, U) + +#undef ENTRY_VLANEBF +#define ENTRY_VLANEBF(N, S, U) \ + ENTRY_LANE (N##_lane_bf16, S, bf16q, bf16, u8, U) \ + ENTRY_LANE (N##_laneq_bf16, S, bf16q, bf16, u8q, U) \ + ENTRY_LANE (N##q_lane_bf16, S, bf16q, bf16q, u8, U) \ + ENTRY_LANE (N##q_laneq_bf16, S, bf16q, bf16q, u8q, U) + // faminmax #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX) ENTRY_VHSDF (vamax, binary, UNSPEC_FAMAX) ENTRY_VHSDF (vamin, binary, UNSPEC_FAMIN) #undef REQUIRED_EXTENSIONS + +// lut +#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_LUT) +ENTRY_VLANEIS (vluti2, binary_lane, UNSPEC_SIMD_LUTI2) +ENTRY_VLANEIU (vluti2, binary_lane, UNSPEC_SIMD_LUTI2) +ENTRY_VLANEP (vluti2, binary_lane, UNSPEC_SIMD_LUTI2) +ENTRY_VLANEF (vluti2, binary_lane, UNSPEC_SIMD_LUTI2) +ENTRY_VLANEBF (vluti2, binary_lane, UNSPEC_SIMD_LUTI2) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e456f693d2f..73834966d21 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -9965,3 +9965,13 @@ "TARGET_FAMINMAX" "<faminmax_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" ) + +(define_insn "@aarch64_<vluti_uns_op><VLUT1:mode><VLUT2:mode>" + [(set (match_operand:<VLUT1:VCONQ> 0 "register_operand" "=w") + (unspec:<VLUT1:VCONQ> [(match_operand:VLUT1 1 "register_operand" "w") + (match_operand:VLUT2 2 "register_operand" "w") + (match_operand:SI 3 "const_int_operand")] + VLUT_UNS))] + "TARGET_SIMD" + "luti2\t%0<VLUT1:Vmtype>, %1<VLUT1:Vmtype>, %2[%3]" +) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 593319fd472..86471717a49 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -474,6 +474,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED #define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX) #define TARGET_SVE_FAMINMAX (TARGET_SVE && TARGET_FAMINMAX) +/* Lookup table (LUTI) extension instructions are + enabled through +lut. */ +#define TARGET_LUT AARCH64_HAVE_ISA (LUT) + /* Prefer different predicate registers for the output of a predicated operation over re-using an existing input predicate. */ #define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 0bc98315bb6..76cf44516ec 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1064,6 +1064,8 @@ UNSPEC_FCVTXN ; Used in aarch64-simd.md. UNSPEC_FAMAX ; Used in aarch64-simd.md. UNSPEC_FAMIN ; Used in aarch64-simd.md. + UNSPEC_SIMD_LUTI2 ; Used in aarch64-simd.md. + UNSPEC_SIMD_LUTI4 ; Used in aarch64-simd.md. ;; All used in aarch64-sve2.md UNSPEC_FCVTN @@ -1603,11 +1605,24 @@ (V2SI "V4SI") (V4SI "V4SI") (DI "V2DI") (V2DI "V2DI") (V4HF "V8HF") (V8HF "V8HF") + (V4BF "V8BF") (V8BF "V8BF") (V2SF "V4SF") (V4SF "V4SF") (V2DF "V2DF") (SI "V4SI") (HI "V8HI") (QI "V16QI") (SF "V4SF") (DF "V2DF")]) +;; 128-bit container modes the inner or scalar source mode, in lower-case. +(define_mode_attr Vconq [(V8QI "v16qi") (V16QI "v16qi") + (V4HI "v8hi") (V8HI "v8hi") + (V2SI "v4si") (V4SI "v4si") + (DI "v2di") (V2DI "v2di") + (V4HF "v8hf") (V8HF "v8hf") + (V4BF "v8bf") (V8BF "v8bf") + (V2SF "v4sf") (V4SF "v4sf") + (V2DF "v2df") (SI "v4si") + (HI "v8hi") (QI "v16qi") + (SF "v4sf") (DF "v2df")]) + ;; Half modes of all vector modes. (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") (V4HI "V2HI") (V8HI "V4HI") @@ -4531,3 +4546,13 @@ (define_code_attr faminmax_op [(smax "famax") (smin "famin")]) + +;; Iterators and attributes for lut + +(define_mode_iterator VLUT1 [V16QI V8QI V8HI V4HI V8HF V4HF V8BF V4BF]) + +(define_mode_iterator VLUT2 [V8QI V16QI]) +(define_int_iterator VLUT_UNS [UNSPEC_SIMD_LUTI2 UNSPEC_SIMD_LUTI4]) + +(define_int_attr vluti_uns_op + [(UNSPEC_SIMD_LUTI2 "luti2") (UNSPEC_SIMD_LUTI4 "luti4")]) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vluti-builtins.c b/gcc/testsuite/gcc.target/aarch64/simd/vluti-builtins.c new file mode 100644 index 00000000000..142657ba2ab --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vluti-builtins.c @@ -0,0 +1,329 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv9-a+lut" } */ +/* { dg-final { check-function-bodies "**" ""} } */ + +#include "arm_neon.h" + +/* +** test_vluti2_lane_u8: +** luti2 v0\.8b, v0\.8b, v1\[0\] +** ret +*/ + +uint8x16_t +test_vluti2_lane_u8(uint8x8_t a, uint8x8_t b) +{ + return vluti2_lane_u8(a, b, 0); +} + +/* +** test_vluti2q_lane_u8: +** luti2 v0\.16b, v0\.16b, v1\[0\] +** ret +*/ + +uint8x16_t +test_vluti2q_lane_u8(uint8x16_t a, uint8x8_t b) +{ + return vluti2q_lane_u8(a, b, 0); +} + +/* +** test_vluti2_laneq_u8: +** luti2 v0\.8b, v0\.8b, v1\[0\] +** ret +*/ + +uint8x16_t +test_vluti2_laneq_u8(uint8x8_t a, uint8x16_t b) +{ + return vluti2_laneq_u8(a, b, 0); +} + +/* +** test_vluti2q_laneq_u8: +** luti2 v0\.16b, v0\.16b, v1\[0\] +** ret +*/ + +uint8x16_t +test_vluti2q_laneq_u8(uint8x16_t a, uint8x16_t b) +{ + return vluti2q_laneq_u8(a, b, 0); +} + +/* +** test_vluti2_lane_s8: +** luti2 v0\.8b, v0\.8b, v1\[0\] +** ret +*/ + +int8x16_t +test_vluti2_lane_s8(int8x8_t a, uint8x8_t b) +{ + return vluti2_lane_s8(a, b, 0); +} + +/* +** test_vluti2q_lane_s8: +** luti2 v0\.16b, v0\.16b, v1\[0\] +** ret +*/ + +int8x16_t +test_vluti2q_lane_s8(int8x16_t a, uint8x8_t b) +{ + return vluti2q_lane_s8(a, b, 0); +} + +/* +** test_vluti2_laneq_s8: +** luti2 v0\.8b, v0\.8b, v1\[0\] +** ret +*/ + +int8x16_t +test_vluti2_laneq_s8(int8x8_t a, uint8x16_t b) +{ + return vluti2_laneq_s8(a, b, 0); +} + +/* +** test_vluti2q_laneq_s8: +** luti2 v0\.16b, v0\.16b, v1\[0\] +** ret +*/ + +int8x16_t +test_vluti2q_laneq_s8(int8x16_t a, uint8x16_t b) +{ + return vluti2q_laneq_s8(a, b, 0); +} + +/* +** test_vluti2_lane_u16: +** luti2 v0\.4h, v0\.4h, v1\[0\] +** ret +*/ + +uint16x8_t +test_vluti2_lane_u16(uint16x4_t a, uint8x8_t b) +{ + return vluti2_lane_u16(a, b, 0); +} + +/* +** test_vluti2q_lane_u16: +** luti2 v0\.8h, v0\.8h, v1\[0\] +** ret +*/ + +uint16x8_t +test_vluti2q_lane_u16(uint16x8_t a, uint8x8_t b) +{ + return vluti2q_lane_u16(a, b, 0); +} + +/* +** test_vluti2_laneq_u16: +** luti2 v0\.4h, v0\.4h, v1\[0\] +** ret +*/ + +uint16x8_t +test_vluti2_laneq_u16(uint16x4_t a, uint8x16_t b) +{ + return vluti2_laneq_u16(a, b, 0); +} + +/* +** test_vluti2q_laneq_u16: +** luti2 v0\.8h, v0\.8h, v1\[0\] +** ret +*/ + +uint16x8_t +test_vluti2q_laneq_u16(uint16x8_t a, uint8x16_t b) +{ + return vluti2q_laneq_u16(a, b, 0); +} + +/* +** test_vluti2q_lane_s16: +** luti2 v0\.8h, v0\.8h, v1\[0\] +** ret +*/ + +int16x8_t +test_vluti2q_lane_s16(int16x8_t a, uint8x8_t b) +{ + return vluti2q_lane_s16(a, b, 0); +} + +/* +** test_vluti2_laneq_s16: +** luti2 v0\.4h, v0\.4h, v1\[0\] +** ret +*/ + +int16x8_t +test_vluti2_laneq_s16(int16x4_t a, uint8x16_t b) +{ + return vluti2_laneq_s16(a, b, 0); +} + +/* +** test_vluti2q_laneq_s16: +** luti2 v0\.8h, v0\.8h, v1\[0\] +** ret +*/ + +int16x8_t +test_vluti2q_laneq_s16(int16x8_t a, uint8x16_t b) +{ + return vluti2q_laneq_s16(a, b, 0); +} + +/* +** test_vluti2_lane_p8: +** luti2 v0\.8b, v0\.8b, v1\[0\] +** ret +*/ +poly8x16_t test_vluti2_lane_p8(poly8x8_t vn, uint8x8_t vm) { + return vluti2_lane_p8(vn, vm, 0); +} + +/* +** test_vluti2_laneq_p8: +** luti2 v0\.8b, v0\.8b, v1\[0\] +** ret +*/ +poly8x16_t test_vluti2_laneq_p8(poly8x8_t vn, uint8x16_t vm) { + return vluti2_laneq_p8(vn, vm, 0); +} + +/* +** test_vluti2q_lane_p8: +** luti2 v0\.16b, v0\.16b, v1\[0\] +** ret +*/ +poly8x16_t test_vluti2q_lane_p8(poly8x16_t vn, uint8x8_t vm) { + return vluti2q_lane_p8(vn, vm, 0); +} + +/* +** test_vluti2q_laneq_p8: +** luti2 v0\.16b, v0\.16b, v1\[0\] +** ret +*/ +poly8x16_t test_vluti2q_laneq_p8(poly8x16_t vn, uint8x16_t vm) { + return vluti2q_laneq_p8(vn, vm, 0); +} + +/* +** test_vluti2_lane_f16: +** luti2 v0\.4h, v0\.4h, v1\[0\] +** ret +*/ +float16x8_t test_vluti2_lane_f16(float16x4_t vn, uint8x8_t vm) { + return vluti2_lane_f16(vn, vm, 0); +} + +/* +** test_vluti2_laneq_f16: +** luti2 v0\.4h, v0\.4h, v1\[0\] +** ret +*/ +float16x8_t test_vluti2_laneq_f16(float16x4_t vn, uint8x16_t vm) { + return vluti2_laneq_f16(vn, vm, 0); +} + +/* +** test_vluti2q_lane_f16: +** luti2 v0\.8h, v0\.8h, v1\[0\] +** ret +*/ +float16x8_t test_vluti2q_lane_f16(float16x8_t vn, uint8x8_t vm) { + return vluti2q_lane_f16(vn, vm, 0); +} + +/* +** test_vluti2q_laneq_f16: +** luti2 v0\.8h, v0\.8h, v1\[0\] +** ret +*/ +float16x8_t test_vluti2q_laneq_f16(float16x8_t vn, uint8x16_t vm) { + return vluti2q_laneq_f16(vn, vm, 0); +} + +/* +** test_vluti2_lane_bf16: +** luti2 v0\.4h, v0\.4h, v1\[0\] +** ret +*/ +bfloat16x8_t test_vluti2_lane_bf16(bfloat16x4_t vn, uint8x8_t vm) { + return vluti2_lane_bf16(vn, vm, 0); +} + +/* +** test_vluti2_laneq_bf16: +** luti2 v0\.4h, v0\.4h, v1\[0\] +** ret +*/ +bfloat16x8_t test_vluti2_laneq_bf16(bfloat16x4_t vn, uint8x16_t vm) { + return vluti2_laneq_bf16(vn, vm, 0); +} + +/* +** test_vluti2q_lane_bf16: +** luti2 v0\.8h, v0\.8h, v1\[0\] +** ret +*/ +bfloat16x8_t test_vluti2q_lane_bf16(bfloat16x8_t vn, uint8x8_t vm) { + return vluti2q_lane_bf16(vn, vm, 0); +} + +/* +** test_vluti2q_laneq_bf16: +** luti2 v0\.8h, v0\.8h, v1\[0\] +** ret +*/ +bfloat16x8_t test_vluti2q_laneq_bf16(bfloat16x8_t vn, uint8x16_t vm) { + return vluti2q_laneq_bf16(vn, vm, 0); +} + +/* +** test_vluti2_lane_p16: +** luti2 v0\.4h, v0\.4h, v1\[0\] +** ret +*/ +poly16x8_t test_vluti2_lane_p16(poly16x4_t vn, uint8x8_t vm) { + return vluti2_lane_p16(vn, vm, 0); +} + +/* +** test_vluti2_laneq_p16: +** luti2 v0\.4h, v0.4h, v1\[0\] +** ret +*/ +poly16x8_t test_vluti2_laneq_p16(poly16x4_t vn, uint8x16_t vm) { + return vluti2_laneq_p16(vn, vm, 0); +} + +/* +** test_vluti2q_lane_p16: +** luti2 v0\.8h, v0\.8h, v1\[0\] +** ret +*/ +poly16x8_t test_vluti2q_lane_p16(poly16x8_t vn, uint8x8_t vm) { + return vluti2q_lane_p16(vn, vm, 0); +} + +/* +** test_vluti2q_laneq_p16: +** luti2 v0\.8h, v0\.8h, v1\[0\] +** ret +*/ +poly16x8_t test_vluti2q_laneq_p16(poly16x8_t vn, uint8x16_t vm) { + return vluti2q_laneq_p16(vn, vm, 0); +}