Hi Tamar, > On 2 Sep 2025, at 11:46, Tamar Christina <tamar.christ...@arm.com> wrote: > > This implements the new vector optabs vec_<su>addh_narrow<mode> > adding support for in-vectorizer use for early break.
The Advanced SIMD ADDHN instruction doesn’t perform a widening of the operands for the addition as far as I know. That is why there are no “SADDHN” or “UADDHN” instructions, which would correspond to a sign-extend or zero-extend of the operands. So I think there’s something off here…. > > Bootstrapped Regtested on aarch64-none-linux-gnu, > arm-none-linux-gnueabihf, x86_64-pc-linux-gnu > -m32, -m64 and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * config/aarch64/aarch64-simd.md (vec_<su>addh_narrow<mode>): New. > * config/aarch64/iterators.md (UNSPEC_SADDHN, UNSPEC_UADDHN): New. > (su, ADDHN): Use them. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/vect-addhn_1.c: New test. > > --- > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > 8b75c3d7f6d5ddc5c44f841da961423caaebe8b8..a1737792093ffea5f72c10c1cb4df6322280dbf4 > 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -949,6 +949,22 @@ (define_expand "vec_widen_<su>abd_lo_<mode>" > } > ) > > +(define_expand "vec_<su>addh_narrow<mode>" > + [(set (match_operand:<VNARROWQ> 0 "register_operand") > + (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand") > + (match_operand:VQN 2 "register_operand"))] > + ADDHN))] > + "TARGET_SIMD" > + { > + rtx shft > + = aarch64_simd_gen_const_vector_dup (<MODE>mode, > + GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2); > + emit_insn (gen_aarch64_addhn<mode>_insn (operands[0], operands[1], > + operands[2], shft)); > + DONE; … So this would produce the exact same RTL for the “vec_saddh_narrow” and “vec_uaddh_narrow” cases. > + } > +) > + > (define_insn "aarch64_<su>abal<mode>" > [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > (plus:<VWIDE> > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index > b15e57843fed78ffe7edd927cfd7acbf395414a4..c65509fe949ad3056d194f05f901e76382d62ad8 > 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -806,6 +806,8 @@ (define_c_enum "unspec" > UNSPEC_UHADD ; Used in aarch64-simd.md. > UNSPEC_SRHADD ; Used in aarch64-simd.md. > UNSPEC_URHADD ; Used in aarch64-simd.md. > + UNSPEC_SADDHN ; Used in aarch64-simd.md. > + UNSPEC_UADDHN ; Used in aarch64-simd.md. As per above, there are no such instructions in Advanced SIMD so it would be confusing to have UNSPEC_ values to represent them. Thanks, Kyrill > UNSPEC_SHSUB ; Used in aarch64-simd.md. > UNSPEC_UHSUB ; Used in aarch64-simd.md. > UNSPEC_SQDMULH ; Used in aarch64-simd.md. > @@ -3251,6 +3253,8 @@ (define_int_iterator HADD [UNSPEC_SHADD UNSPEC_UHADD]) > > (define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD]) > > +(define_int_iterator ADDHN [UNSPEC_SADDHN UNSPEC_UADDHN]) > + > (define_int_iterator BSL_DUP [1 2]) > > (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT]) > @@ -4250,7 +4254,8 @@ (define_int_attr su [(UNSPEC_SADDV "s") > (UNSPEC_COND_SCVTF "s") > (UNSPEC_COND_UCVTF "u") > (UNSPEC_SMULHS "s") (UNSPEC_UMULHS "u") > - (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")]) > + (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u") > + (UNSPEC_SADDHN "s") (UNSPEC_UADDHN "u")]) > > (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u") > (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur") > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c > b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..b6f66f979ff71d40d1e09292c1c12df3c262ce9c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c > @@ -0,0 +1,88 @@ > +/* { dg-require-effective-target vect_int } */ > + > +#include <stdint.h> > +#include <stdio.h> > + > +#include "tree-vect.h" > + > +#define N 1000 > +#define CHECK_ERROR(cond, fmt, ...) \ > + do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } } > while (0) > + > +// Generates all test components for a given type combo > +#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT) > \ > + A_TYPE a_##A_TYPE##_##C_TYPE[N]; > \ > + A_TYPE b_##A_TYPE##_##C_TYPE[N]; > \ > + C_TYPE c_##A_TYPE##_##C_TYPE[N]; > \ > + C_TYPE ref_##A_TYPE##_##C_TYPE[N]; > \ > + > \ > + void init_##A_TYPE##_##C_TYPE() { > \ > + _Pragma ("GCC novector") \ > + for (int i = 0; i < N; i++) { > \ > + a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3); > \ > + b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7); > \ > + } > \ > + } > \ > + > \ > + void foo_##A_TYPE##_##C_TYPE() { > \ > + for (int i = 0; i < N; i++) > \ > + c_##A_TYPE##_##C_TYPE[i] = > \ > + ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] + > \ > + (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT; > \ > + } > \ > + > \ > + void ref_##A_TYPE##_##C_TYPE##_compute() { > \ > + _Pragma ("GCC novector") \ > + for (int i = 0; i < N; i++) > \ > + ref_##A_TYPE##_##C_TYPE[i] = > \ > + ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] + > \ > + (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT; > \ > + } > \ > + > \ > + void validate_##A_TYPE##_##C_TYPE(const char* variant_name) { > \ > + _Pragma ("GCC novector") \ > + for (int i = 0; i < N; i++) { > \ > + if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) { > \ > + printf("FAIL [%s]: Index %d: got %lld, expected %lld\n", > \ > + variant_name, i, > \ > + (long long)c_##A_TYPE##_##C_TYPE[i], > \ > + (long long)ref_##A_TYPE##_##C_TYPE[i]); > \ > + __builtin_abort (); > \ > + } > \ > + } > \ > + } > + > +// Runs the test for one combo with name output > +#define RUN_COMBO(A_TYPE, C_TYPE) \ > + do { \ > + init_##A_TYPE##_##C_TYPE(); \ > + foo_##A_TYPE##_##C_TYPE(); \ > + ref_##A_TYPE##_##C_TYPE##_compute(); \ > + validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE); \ > + } while (0) > + > +// Instantiate all valid combinations > +TEST_COMBO(int16_t, int8_t, int32_t, 8) > +TEST_COMBO(uint16_t, uint8_t, uint32_t, 8) > +TEST_COMBO(int32_t, int16_t, int64_t, 16) > +TEST_COMBO(uint32_t, uint16_t, uint64_t, 16) > +#if defined(__aarch64__) > +TEST_COMBO(int64_t, int32_t, __int128_t, 32) > +TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32) > +#endif > + > +int main() { > + check_vect (); > + > + RUN_COMBO(int16_t, int8_t); > + RUN_COMBO(uint16_t, uint8_t); > + RUN_COMBO(int32_t, int16_t); > + RUN_COMBO(uint32_t, uint16_t); > +#if defined(__aarch64__) > + RUN_COMBO(int64_t, int32_t); > + RUN_COMBO(uint64_t, uint32_t); > +#endif > + > + return 0; > +} > + > > > -- > <rb19744.patch>