This patch adds support for auto-vectorization of clz for MVE. It does so by removing the unspec from mve_vclzq_<supf><mode> and uses 'clz' instead. It moves to neon_vclz<mode> expander from neon.md to vec-common.md and renames it into the standard name clz<mode>2.
2021-06-03 Christophe Lyon <christophe.l...@linaro.org> gcc/ * config/arm/iterators.md (<supf>): Remove VCLZQ_U, VCLZQ_S. (VCLZQ): Remove. * config/arm/mve.md (mve_vclzq_<supf><mode>): Add '@' prefix, remove <supf> iterator. (mve_vclzq_u<mode>): New. * config/arm/neon.md (clz<mode>2): Rename to neon_vclz<mode>. (neon_vclz<mode): Move to ... * config/arm/unspecs.md (VCLZQ_U, VCLZQ_S): Remove. * config/arm/vec-common.md ... here. Add support for MVE. gcc/testsuite/ * gcc.target/arm/simd/mve-vclz.c: New test. --- gcc/config/arm/iterators.md | 3 +-- gcc/config/arm/mve.md | 12 ++++++--- gcc/config/arm/neon.md | 11 +------- gcc/config/arm/unspecs.md | 2 -- gcc/config/arm/vec-common.md | 13 +++++++++ gcc/testsuite/gcc.target/arm/simd/mve-vclz.c | 28 ++++++++++++++++++++ 6 files changed, 52 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vclz.c diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 3042bafc6c6..5c4fe895268 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -1288,7 +1288,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s") (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u") (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s") (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u") - (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u") + (VREV32Q_U "u") (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s") (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u") (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s") @@ -1538,7 +1538,6 @@ (define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U]) (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S]) (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S]) (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S]) -(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S]) (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S]) (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S]) (define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 04aa612331a..99e46d0bc69 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -435,16 +435,22 @@ (define_insn "mve_vdupq_n_<supf><mode>" ;; ;; [vclzq_u, vclzq_s]) ;; -(define_insn "mve_vclzq_<supf><mode>" +(define_insn "@mve_vclzq_s<mode>" [ (set (match_operand:MVE_2 0 "s_register_operand" "=w") - (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")] - VCLZQ)) + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w"))) ] "TARGET_HAVE_MVE" "vclz.i%#<V_sz_elem> %q0, %q1" [(set_attr "type" "mve_move") ]) +(define_expand "mve_vclzq_u<mode>" + [ + (set (match_operand:MVE_2 0 "s_register_operand") + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand"))) + ] + "TARGET_HAVE_MVE" +) ;; ;; [vclsq_s]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 18571d819eb..0fdffaf4ec4 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -3018,7 +3018,7 @@ (define_insn "neon_vcls<mode>" [(set_attr "type" "neon_cls<q>")] ) -(define_insn "clz<mode>2" +(define_insn "neon_vclz<mode>" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] "TARGET_NEON" @@ -3026,15 +3026,6 @@ (define_insn "clz<mode>2" [(set_attr "type" "neon_cnt<q>")] ) -(define_expand "neon_vclz<mode>" - [(match_operand:VDQIW 0 "s_register_operand") - (match_operand:VDQIW 1 "s_register_operand")] - "TARGET_NEON" -{ - emit_insn (gen_clz<mode>2 (operands[0], operands[1])); - DONE; -}) - (define_insn "popcount<mode>2" [(set (match_operand:VE 0 "s_register_operand" "=w") (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index ed1bc293b78..ad1c6edd005 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -556,8 +556,6 @@ (define_c_enum "unspec" [ VQABSQ_S VDUPQ_N_U VDUPQ_N_S - VCLZQ_U - VCLZQ_S VCLSQ_S VADDVQ_S VADDVQ_U diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 2779c1a8aaa..1ba1e5eb008 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -625,3 +625,16 @@ (define_expand "uavg<mode>3_ceil" operands[0], operands[1], operands[2])); DONE; }) + +(define_expand "clz<mode>2" + [(match_operand:VDQIW 0 "s_register_operand") + (match_operand:VDQIW 1 "s_register_operand")] + "ARM_HAVE_<MODE>_ARITH + && !TARGET_REALLY_IWMMXT" +{ + if (TARGET_NEON) + emit_insn (gen_neon_vclz<mode> (operands[0], operands[1])); + else + emit_insn (gen_mve_vclzq_s (<MODE>mode, operands[0], operands[1])); + DONE; +}) diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c new file mode 100644 index 00000000000..7068736bc28 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O3" } */ + +#include <stdint.h> + +#define FUNC(SIGN, TYPE, BITS, NAME) \ + void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \ + TYPE##BITS##_t *a) { \ + int i; \ + for (i=0; i < (128 / BITS); i++) { \ + dest[i] = (TYPE##BITS##_t)__builtin_clz(a[i]); \ + } \ +} + +FUNC(s, int, 32, clz) +FUNC(u, uint, 32, clz) +FUNC(s, int, 16, clz) +FUNC(u, uint, 16, clz) +FUNC(s, int, 8, clz) +FUNC(u, uint, 8, clz) + +/* 16 and 8-bit versions are not vectorized because they need pack/unpack + patterns since __builtin_clz uses 32-bit parameter and return value. */ +/* { dg-final { scan-assembler-times {vclz\.i32 q[0-9]+, q[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vclz\.i16 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {vclz\.i8 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */ -- 2.25.1