Follow Richard's suggestions, we should not model address cost in the loop vectorizer for select_vl or decrement IV since other style vectorization doesn't do that.
To make cost model comparison apple to apple. This patch set COST from 2 to 1 which turns out have better codegen in various codegen for RVV. Ok for trunk ? PR target/111153 gcc/ChangeLog: * tree-vect-loop.cc (vect_estimate_min_profitable_iters): Remove address cost for select_vl/decrement IV. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/pr111153.c: Moved to... * gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c: ...here. * gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c: New test. --- .../vect/costmodel/riscv/rvv/pr111153-1.c | 18 ++++++++++++++++++ .../riscv/rvv/{pr111153.c => pr11153-2.c} | 4 ++-- gcc/tree-vect-loop.cc | 10 ++++------ 3 files changed, 24 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c rename gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/{pr111153.c => pr11153-2.c} (93%) diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c new file mode 100644 index 00000000000..51c91f7410c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mtune=generic-ooo -ffast-math" } */ + +#define DEF_REDUC_PLUS(TYPE) \ + TYPE __attribute__ ((noinline, noclone)) \ + reduc_plus_##TYPE (TYPE *__restrict a, int n) \ + { \ + TYPE r = 0; \ + for (int i = 0; i < n; ++i) \ + r += a[i]; \ + return r; \ + } + +#define TEST_PLUS(T) T (int) T (float) + +TEST_PLUS (DEF_REDUC_PLUS) + +/* { dg-final { scan-assembler-not {vsetivli\s+zero,\s*4} } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c similarity index 93% rename from gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c rename to gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c index 06e08ec5f2e..d361f1fc7fa 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mtune=generic-ooo" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -ffast-math" } */ #define DEF_REDUC_PLUS(TYPE) \ TYPE __attribute__ ((noinline, noclone)) \ @@ -11,7 +11,7 @@ return r; \ } -#define TEST_PLUS(T) T (int) +#define TEST_PLUS(T) T (int) T (float) TEST_PLUS (DEF_REDUC_PLUS) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 19e38b8637b..7a3db5f098b 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -4872,12 +4872,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, unsigned int length_update_cost = 0; if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)) - /* For decrement IV style, we use a single SELECT_VL since - beginning to calculate the number of elements need to be - processed in current iteration, and a SHIFT operation to - compute the next memory address instead of adding vectorization - factor. */ - length_update_cost = 2; + /* For decrement IV style, Each only need a single SELECT_VL + or MIN since beginning to calculate the number of elements + need to be processed in current iteration. */ + length_update_cost = 1; else /* For increment IV stype, Each may need two MINs and one MINUS to update lengths in body for next iteration. */ -- 2.36.1