Follow Richard's suggestions, we should not model address cost in the loop
vectorizer for select_vl or decrement IV since other style vectorization doesn't
do that.

To make cost model comparison apple to apple.
This patch set COST from 2 to 1 which turns out have better codegen
in various codegen for RVV.

Ok for trunk ?

        PR target/111153

gcc/ChangeLog:

        * tree-vect-loop.cc (vect_estimate_min_profitable_iters): Remove 
address cost for select_vl/decrement IV.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/costmodel/riscv/rvv/pr111153.c: Moved to...
        * gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c: ...here.
        * gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c: New test.

---
 .../vect/costmodel/riscv/rvv/pr111153-1.c      | 18 ++++++++++++++++++
 .../riscv/rvv/{pr111153.c => pr11153-2.c}      |  4 ++--
 gcc/tree-vect-loop.cc                          | 10 ++++------
 3 files changed, 24 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
 rename gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/{pr111153.c => 
pr11153-2.c} (93%)

diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
new file mode 100644
index 00000000000..51c91f7410c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-mtune=generic-ooo -ffast-math" } */
+
+#define DEF_REDUC_PLUS(TYPE)                                                   
\
+  TYPE __attribute__ ((noinline, noclone))                                     
\
+  reduc_plus_##TYPE (TYPE *__restrict a, int n)                                
\
+  {                                                                            
\
+    TYPE r = 0;                                                                
\
+    for (int i = 0; i < n; ++i)                                                
\
+      r += a[i];                                                               
\
+    return r;                                                                  
\
+  }
+
+#define TEST_PLUS(T) T (int) T (float)
+
+TEST_PLUS (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-not {vsetivli\s+zero,\s*4} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
similarity index 93%
rename from gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
rename to gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
index 06e08ec5f2e..d361f1fc7fa 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-mtune=generic-ooo" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -ffast-math" 
} */
 
 #define DEF_REDUC_PLUS(TYPE)                                                   
\
   TYPE __attribute__ ((noinline, noclone))                                     
\
@@ -11,7 +11,7 @@
     return r;                                                                  
\
   }
 
-#define TEST_PLUS(T) T (int)
+#define TEST_PLUS(T) T (int) T (float)
 
 TEST_PLUS (DEF_REDUC_PLUS)
 
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 19e38b8637b..7a3db5f098b 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -4872,12 +4872,10 @@ vect_estimate_min_profitable_iters (loop_vec_info 
loop_vinfo,
 
            unsigned int length_update_cost = 0;
            if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
-             /* For decrement IV style, we use a single SELECT_VL since
-                beginning to calculate the number of elements need to be
-                processed in current iteration, and a SHIFT operation to
-                compute the next memory address instead of adding vectorization
-                factor.  */
-             length_update_cost = 2;
+             /* For decrement IV style, Each only need a single SELECT_VL
+                or MIN since beginning to calculate the number of elements
+                need to be processed in current iteration.  */
+             length_update_cost = 1;
            else
              /* For increment IV stype, Each may need two MINs and one MINUS to
                 update lengths in body for next iteration.  */
-- 
2.36.1

Reply via email to