Improve RISC-V vector code generation by preferring tail-agnostic (ta) and
mask-agnostic (ma) policies for vector instructions when merge operands
are undefined. This optimization, controlled by a uarch-specific 
`prefer_agnostic`
tuning parameter, reduces `vsetvl` instructions and avoids conservative
undisturbed policy selections, addressing PR target/118945.

Changes from v1:
        - According to review comments, make it "demand policy" for "agnostic"
        instead of changing the fusion rule.

        PR target/118945
gcc/ChangeLog:

        * config/riscv/riscv.cc (riscv_prefer_agnostic_p): New function.
        (riscv_tune_param): Add prefer_agnostic member.
        (various tune info structures): Initialize prefer_agnostic.
        * config/riscv/riscv-protos.h (riscv_prefer_agnostic_p): Add
        prototype.
        * config/riscv/riscv-v.cc (get_prefer_tail_policy,
        get_prefer_mask_policy): Use riscv_prefer_agnostic_p.
        * config/riscv/riscv-vsetvl.cc (vsetvl_info::get_demand_flags):
        demand policy for agnostic when prefer_agnostic is true.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/pr118945-1.c: New file.
        * gcc.target/riscv/rvv/autovec/pr118945-2.c: New file.
---
 gcc/config/riscv/riscv-protos.h               |  1 +
 gcc/config/riscv/riscv-v.cc                   | 12 +++------
 gcc/config/riscv/riscv-vsetvl.cc              |  4 +--
 gcc/config/riscv/riscv.cc                     | 22 +++++++++++++++-
 .../gcc.target/riscv/rvv/autovec/pr118945-1.c | 13 ++++++++++
 .../gcc.target/riscv/rvv/autovec/pr118945-2.c | 26 +++++++++++++++++++
 6 files changed, 67 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index e4473f45d..346d7a812 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -832,6 +832,7 @@ extern bool th_print_operand_address (FILE *, machine_mode, 
rtx);
 #endif
 
 extern bool strided_load_broadcast_p (void);
+extern bool riscv_prefer_agnostic_p (void);
 extern bool riscv_use_divmod_expander (void);
 void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, tree, 
int);
 extern bool
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 8021bc14e..1d7d8a61b 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2140,10 +2140,8 @@ get_ma (rtx ma)
 enum tail_policy
 get_prefer_tail_policy ()
 {
-  /* TODO: By default, we choose to use TAIL_ANY which allows
-     compiler pick up either agnostic or undisturbed. Maybe we
-     will have a compile option like -mprefer=agnostic to set
-     this value???.  */
+  if (riscv_prefer_agnostic_p ())
+    return TAIL_AGNOSTIC;
   return TAIL_ANY;
 }
 
@@ -2151,10 +2149,8 @@ get_prefer_tail_policy ()
 enum mask_policy
 get_prefer_mask_policy ()
 {
-  /* TODO: By default, we choose to use MASK_ANY which allows
-     compiler pick up either agnostic or undisturbed. Maybe we
-     will have a compile option like -mprefer=agnostic to set
-     this value???.  */
+  if (riscv_prefer_agnostic_p ())
+    return MASK_AGNOSTIC;
   return MASK_ANY;
 }
 
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 4fe0ae6d9..fa4d21125 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1144,9 +1144,9 @@ public:
              dflags |= demand_flags::DEMAND_LMUL_P;
          }
 
-       if (!m_ta)
+       if (!m_ta || riscv_prefer_agnostic_p ())
          dflags |= demand_flags::DEMAND_TAIL_POLICY_P;
-       if (!m_ma)
+       if (!m_ma || riscv_prefer_agnostic_p ())
          dflags |= demand_flags::DEMAND_MASK_POLICY_P;
       }
 
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 41ee4014c..0b1ec59f8 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -317,6 +317,7 @@ struct riscv_tune_param
   const char *function_align;
   const char *jump_align;
   const char *loop_align;
+  bool prefer_agnostic;
 };
 
 
@@ -481,6 +482,7 @@ static const struct riscv_tune_param generic_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for rocket.  */
@@ -505,6 +507,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -529,6 +532,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for Sifive p400 Series.  */
@@ -553,6 +557,7 @@ static const struct riscv_tune_param sifive_p400_tune_info 
= {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for Sifive p600 Series.  */
@@ -577,6 +582,7 @@ static const struct riscv_tune_param sifive_p600_tune_info 
= {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -601,6 +607,7 @@ static const struct riscv_tune_param thead_c906_tune_info = 
{
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for xiangshan nanhu.  */
@@ -625,6 +632,7 @@ static const struct riscv_tune_param 
xiangshan_nanhu_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for a generic ooo profile.  */
@@ -649,6 +657,7 @@ static const struct riscv_tune_param generic_ooo_tune_info 
= {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for Tenstorrent Ascalon 8 wide.  */
@@ -673,6 +682,7 @@ static const struct riscv_tune_param 
tt_ascalon_d8_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -697,6 +707,7 @@ static const struct riscv_tune_param 
optimize_size_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for MIPS P8700 */
@@ -720,7 +731,8 @@ static const struct riscv_tune_param mips_p8700_tune_info = 
{
   NULL,         /* vector cost */
   NULL,         /* function_align */
   NULL,         /* jump_align */
-  NULL,         /* loop_align */
+  NULL,                /* loop_align.  */
+  true,                /* prefer-agnostic.  */
 };
 
 static bool riscv_avoid_shrink_wrapping_separate ();
@@ -12842,6 +12854,14 @@ strided_load_broadcast_p ()
   return tune_param->use_zero_stride_load;
 }
 
+/* Return TRUE if we should prefer agnostic vector code, FALSE otherwise.  */
+
+bool
+riscv_prefer_agnostic_p ()
+{
+  return tune_param->prefer_agnostic;
+}
+
 /* Return TRUE if we should use the divmod expander, FALSE otherwise.  This
    allows the behavior to be tuned for specific implementations as well as
    when optimizing for size.  */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
new file mode 100644
index 000000000..49705bf7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mtune=generic-ooo -O3 -march=rv64gcv_zvl256b_zba -mabi=lp64d 
-mrvv-max-lmul=m2 -mrvv-vector-bits=scalable" } */
+
+int test(int* in, int n)
+{
+  int accum = 0;
+  for (int i = 0; i < n; i++)
+        accum += in[i];
+
+  return accum;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 4 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c
new file mode 100644
index 000000000..9b9844446
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rva23u64 -mtune=generic-ooo -Ofast -S 
-fno-schedule-insns -fno-schedule-insns2" } */
+
+void vmult(
+    double* dst,
+    const double* src,
+    const unsigned int* rowstart,
+    const unsigned int* colnums,
+    const double* val,
+    const unsigned int n_rows
+) {
+    const double* val_ptr = &val[rowstart[0]];
+    const unsigned int* colnum_ptr = &colnums[rowstart[0]];
+    double* dst_ptr = dst;
+
+    for (unsigned int row = 0; row < n_rows; ++row) {
+        double s = 0.;
+        const double* const val_end_of_row = &val[rowstart[row + 1]];
+        while (val_ptr != val_end_of_row) {
+            s += *val_ptr++ * src[*colnum_ptr++];
+        }
+        *dst_ptr++ = s;
+    }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 5 } } */
-- 
2.43.0

Reply via email to