This patches 200+ ICEs exposed by testing with rv64gc_zve64d. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112694
The rootcause is we disallow poly (1,1) size vectorization in preferred_simd_mode. with this following code: - if (TARGET_MIN_VLEN < 128 && TARGET_MAX_LMUL < RVV_M2) - return word_mode; However, we allow poly (1,1) size in hook: TARGET_VECTORIZE_RELATED_MODE TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES And also enables it in all vectorization patterns. I was adding this into preferred_simd_mode because poly (1,1) size mode will cause ICE in can_duplicate_and_interleave_p. So, the alternative approach we need to block poly (1,1) size in both TARGET_VECTORIZE_RELATED_MODE and TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES hooks and all vectorization patterns. which is ugly approach and too much codes change. Now, after investivation, I find it's nice that loop vectorizer can automatically block poly (1,1) size vector in interleave vectorization with this commit: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=730909fa858bd691095bc23655077aa13b7941a9 So, we don't need to worry about ICE in interleave vectorization and allow poly (1,1) size vector in vectorization which fixes 200+ ICEs in zve64d march. PR target/112694 gcc/ChangeLog: * config/riscv/riscv-v.cc (preferred_simd_mode): Allow poly_int (1,1) vectors. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr112694-1.c: New test. --- gcc/config/riscv/riscv-v.cc | 19 +++------ .../gcc.target/riscv/rvv/autovec/pr112694-1.c | 41 +++++++++++++++++++ 2 files changed, 46 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112694-1.c diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 4bd1131ba87..52826d0e769 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2088,32 +2088,23 @@ expand_tuple_move (rtx *ops) machine_mode preferred_simd_mode (scalar_mode mode) { - /* We will disable auto-vectorization when TARGET_MIN_VLEN < 128 && - riscv_autovec_lmul < RVV_M2. Since GCC loop vectorizer report ICE when we - enable -march=rv64gc_zve32* and -march=rv32gc_zve64*. in the - 'can_duplicate_and_interleave_p' of tree-vect-slp.cc. Since both - RVVM1SImode in -march=*zve32*_zvl32b and RVVM1DImode in - -march=*zve64*_zvl64b are NUNITS = poly (1, 1), they will cause ICE in loop - vectorizer when we enable them in this target hook. Currently, we can - support auto-vectorization in -march=rv32_zve32x_zvl128b. Wheras, - -march=rv32_zve32x_zvl32b or -march=rv32_zve32x_zvl64b are disabled. */ if (autovec_use_vlmax_p ()) { - if (TARGET_MIN_VLEN < 128 && TARGET_MAX_LMUL < RVV_M2) - return word_mode; /* We use LMUL = 1 as base bytesize which is BYTES_PER_RISCV_VECTOR and riscv_autovec_lmul as multiply factor to calculate the the NUNITS to get the auto-vectorization mode. */ poly_uint64 nunits; poly_uint64 vector_size = BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL; poly_uint64 scalar_size = GET_MODE_SIZE (mode); - gcc_assert (multiple_p (vector_size, scalar_size, &nunits)); + /* Disable vectorization when we can't find a RVV mode for it. + E.g. -march=rv64gc_zve32x doesn't have a vector mode to vectorize + a double (DFmode) type. */ + if (!multiple_p (vector_size, scalar_size, &nunits)) + return word_mode; machine_mode rvv_mode; if (get_vector_mode (mode, nunits).exists (&rvv_mode)) return rvv_mode; } - /* TODO: We will support minimum length VLS auto-vectorization in - the future. */ return word_mode; } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112694-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112694-1.c new file mode 100644 index 00000000000..8c7f7a980e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112694-1.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zve64d_zvfh_zfh -mabi=ilp32d -mcmodel=medany -fdiagnostics-plain-output -ftree-vectorize -O2 --param riscv-autovec-lmul=m1 -std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */ + +#include <stdint-gcc.h> + +#define TEST_TYPE(TYPE) \ + __attribute__((noipa)) \ + void vmul_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = a[i] * b[i]; \ + } + +#define TEST_ALL() \ + TEST_TYPE(_Float16) \ + +TEST_ALL() + +#include <assert.h> + +#define SZ 512 + +#define RUN(TYPE, VAL) \ + TYPE a##TYPE[SZ]; \ + TYPE b##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a##TYPE[i] = 2; \ + b##TYPE[i] = VAL; \ + } \ + vmul_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (a##TYPE[i] == 2 * VAL); + +#define RUN_ALL() \ + RUN(_Float16, 4) \ + +int main () +{ + RUN_ALL() +} -- 2.36.3