This implements the new vector optabs vec_<su>addh_narrow<mode> adding support for in-vectorizer use for early break.
Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf, x86_64-pc-linux-gnu -m32, -m64 and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-simd.md (vec_addh_narrow<mode>): New. * config/aarch64/iterators.md (UNSPEC_ADDHN): New. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vect-addhn_1.c: New test. --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 8b75c3d7f6d5ddc5c44f841da961423caaebe8b8..af9154bec67c21d186463baaf7321c9559c1094b 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -949,6 +949,22 @@ (define_expand "vec_widen_<su>abd_lo_<mode>" } ) +(define_expand "vec_addh_narrow<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand") + (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand") + (match_operand:VQN 2 "register_operand"))] + UNSPEC_ADDHN))] + "TARGET_SIMD" + { + rtx shft + = aarch64_simd_gen_const_vector_dup (<MODE>mode, + GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2); + emit_insn (gen_aarch64_addhn<mode>_insn (operands[0], operands[1], + operands[2], shft)); + DONE; + } +) + (define_insn "aarch64_<su>abal<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (plus:<VWIDE> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index b15e57843fed78ffe7edd927cfd7acbf395414a4..45c3968a5c937938b07992ab2869b5ae4db193a8 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -806,6 +806,7 @@ (define_c_enum "unspec" UNSPEC_UHADD ; Used in aarch64-simd.md. UNSPEC_SRHADD ; Used in aarch64-simd.md. UNSPEC_URHADD ; Used in aarch64-simd.md. + UNSPEC_ADDHN ; Used in aarch64-simd.md. UNSPEC_SHSUB ; Used in aarch64-simd.md. UNSPEC_UHSUB ; Used in aarch64-simd.md. UNSPEC_SQDMULH ; Used in aarch64-simd.md. diff --git a/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c new file mode 100644 index 0000000000000000000000000000000000000000..b6f66f979ff71d40d1e09292c1c12df3c262ce9c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c @@ -0,0 +1,88 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdint.h> +#include <stdio.h> + +#include "tree-vect.h" + +#define N 1000 +#define CHECK_ERROR(cond, fmt, ...) \ + do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } } while (0) + +// Generates all test components for a given type combo +#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT) \ + A_TYPE a_##A_TYPE##_##C_TYPE[N]; \ + A_TYPE b_##A_TYPE##_##C_TYPE[N]; \ + C_TYPE c_##A_TYPE##_##C_TYPE[N]; \ + C_TYPE ref_##A_TYPE##_##C_TYPE[N]; \ + \ + void init_##A_TYPE##_##C_TYPE() { \ + _Pragma ("GCC novector") \ + for (int i = 0; i < N; i++) { \ + a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3); \ + b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7); \ + } \ + } \ + \ + void foo_##A_TYPE##_##C_TYPE() { \ + for (int i = 0; i < N; i++) \ + c_##A_TYPE##_##C_TYPE[i] = \ + ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] + \ + (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT; \ + } \ + \ + void ref_##A_TYPE##_##C_TYPE##_compute() { \ + _Pragma ("GCC novector") \ + for (int i = 0; i < N; i++) \ + ref_##A_TYPE##_##C_TYPE[i] = \ + ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] + \ + (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT; \ + } \ + \ + void validate_##A_TYPE##_##C_TYPE(const char* variant_name) { \ + _Pragma ("GCC novector") \ + for (int i = 0; i < N; i++) { \ + if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) { \ + printf("FAIL [%s]: Index %d: got %lld, expected %lld\n", \ + variant_name, i, \ + (long long)c_##A_TYPE##_##C_TYPE[i], \ + (long long)ref_##A_TYPE##_##C_TYPE[i]); \ + __builtin_abort (); \ + } \ + } \ + } + +// Runs the test for one combo with name output +#define RUN_COMBO(A_TYPE, C_TYPE) \ + do { \ + init_##A_TYPE##_##C_TYPE(); \ + foo_##A_TYPE##_##C_TYPE(); \ + ref_##A_TYPE##_##C_TYPE##_compute(); \ + validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE); \ + } while (0) + +// Instantiate all valid combinations +TEST_COMBO(int16_t, int8_t, int32_t, 8) +TEST_COMBO(uint16_t, uint8_t, uint32_t, 8) +TEST_COMBO(int32_t, int16_t, int64_t, 16) +TEST_COMBO(uint32_t, uint16_t, uint64_t, 16) +#if defined(__aarch64__) +TEST_COMBO(int64_t, int32_t, __int128_t, 32) +TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32) +#endif + +int main() { + check_vect (); + + RUN_COMBO(int16_t, int8_t); + RUN_COMBO(uint16_t, uint8_t); + RUN_COMBO(int32_t, int16_t); + RUN_COMBO(uint32_t, uint16_t); +#if defined(__aarch64__) + RUN_COMBO(int64_t, int32_t); + RUN_COMBO(uint64_t, uint32_t); +#endif + + return 0; +} + --
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 8b75c3d7f6d5ddc5c44f841da961423caaebe8b8..af9154bec67c21d186463baaf7321c9559c1094b 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -949,6 +949,22 @@ (define_expand "vec_widen_<su>abd_lo_<mode>" } ) +(define_expand "vec_addh_narrow<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand") + (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand") + (match_operand:VQN 2 "register_operand"))] + UNSPEC_ADDHN))] + "TARGET_SIMD" + { + rtx shft + = aarch64_simd_gen_const_vector_dup (<MODE>mode, + GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2); + emit_insn (gen_aarch64_addhn<mode>_insn (operands[0], operands[1], + operands[2], shft)); + DONE; + } +) + (define_insn "aarch64_<su>abal<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (plus:<VWIDE> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index b15e57843fed78ffe7edd927cfd7acbf395414a4..45c3968a5c937938b07992ab2869b5ae4db193a8 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -806,6 +806,7 @@ (define_c_enum "unspec" UNSPEC_UHADD ; Used in aarch64-simd.md. UNSPEC_SRHADD ; Used in aarch64-simd.md. UNSPEC_URHADD ; Used in aarch64-simd.md. + UNSPEC_ADDHN ; Used in aarch64-simd.md. UNSPEC_SHSUB ; Used in aarch64-simd.md. UNSPEC_UHSUB ; Used in aarch64-simd.md. UNSPEC_SQDMULH ; Used in aarch64-simd.md. diff --git a/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c new file mode 100644 index 0000000000000000000000000000000000000000..b6f66f979ff71d40d1e09292c1c12df3c262ce9c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c @@ -0,0 +1,88 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdint.h> +#include <stdio.h> + +#include "tree-vect.h" + +#define N 1000 +#define CHECK_ERROR(cond, fmt, ...) \ + do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } } while (0) + +// Generates all test components for a given type combo +#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT) \ + A_TYPE a_##A_TYPE##_##C_TYPE[N]; \ + A_TYPE b_##A_TYPE##_##C_TYPE[N]; \ + C_TYPE c_##A_TYPE##_##C_TYPE[N]; \ + C_TYPE ref_##A_TYPE##_##C_TYPE[N]; \ + \ + void init_##A_TYPE##_##C_TYPE() { \ + _Pragma ("GCC novector") \ + for (int i = 0; i < N; i++) { \ + a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3); \ + b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7); \ + } \ + } \ + \ + void foo_##A_TYPE##_##C_TYPE() { \ + for (int i = 0; i < N; i++) \ + c_##A_TYPE##_##C_TYPE[i] = \ + ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] + \ + (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT; \ + } \ + \ + void ref_##A_TYPE##_##C_TYPE##_compute() { \ + _Pragma ("GCC novector") \ + for (int i = 0; i < N; i++) \ + ref_##A_TYPE##_##C_TYPE[i] = \ + ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] + \ + (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT; \ + } \ + \ + void validate_##A_TYPE##_##C_TYPE(const char* variant_name) { \ + _Pragma ("GCC novector") \ + for (int i = 0; i < N; i++) { \ + if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) { \ + printf("FAIL [%s]: Index %d: got %lld, expected %lld\n", \ + variant_name, i, \ + (long long)c_##A_TYPE##_##C_TYPE[i], \ + (long long)ref_##A_TYPE##_##C_TYPE[i]); \ + __builtin_abort (); \ + } \ + } \ + } + +// Runs the test for one combo with name output +#define RUN_COMBO(A_TYPE, C_TYPE) \ + do { \ + init_##A_TYPE##_##C_TYPE(); \ + foo_##A_TYPE##_##C_TYPE(); \ + ref_##A_TYPE##_##C_TYPE##_compute(); \ + validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE); \ + } while (0) + +// Instantiate all valid combinations +TEST_COMBO(int16_t, int8_t, int32_t, 8) +TEST_COMBO(uint16_t, uint8_t, uint32_t, 8) +TEST_COMBO(int32_t, int16_t, int64_t, 16) +TEST_COMBO(uint32_t, uint16_t, uint64_t, 16) +#if defined(__aarch64__) +TEST_COMBO(int64_t, int32_t, __int128_t, 32) +TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32) +#endif + +int main() { + check_vect (); + + RUN_COMBO(int16_t, int8_t); + RUN_COMBO(uint16_t, uint8_t); + RUN_COMBO(int32_t, int16_t); + RUN_COMBO(uint32_t, uint16_t); +#if defined(__aarch64__) + RUN_COMBO(int64_t, int32_t); + RUN_COMBO(uint64_t, uint32_t); +#endif + + return 0; +} +