LGTM







 ----------Reply to Message----------
 On Fri, Oct 3, 2025 21:04 PM Zhongyao Chen<[email protected]&gt; 
wrote:

  Improve RISC-V vector code generation by preferring tail-agnostic (ta) and
mask-agnostic (ma) policies for vector instructions when merge operands
are undefined. This optimization, controlled by a uarch-specific 
`prefer_agnostic`
tuning parameter, reduces `vsetvl` instructions and avoids conservative
undisturbed policy selections, addressing PR target/118945.

Changes from v2:
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; - more detailed comment.
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; - refine the test to check for 
vsetvli ta/tu number explicitly.

PR target/118945
gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_prefer_agnostic_p): New function.
(riscv_tune_param): Add prefer_agnostic member.
(various tune info structures): Initialize prefer_agnostic.
* config/riscv/riscv-protos.h (riscv_prefer_agnostic_p): Add
prototype.
* config/riscv/riscv-v.cc (get_prefer_tail_policy,
get_prefer_mask_policy): Use riscv_prefer_agnostic_p.
* config/riscv/riscv-vsetvl.cc (vsetvl_info::get_demand_flags):
demand policy for agnostic when prefer_agnostic is true.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr118945-1.c: New file.
* gcc.target/riscv/rvv/autovec/pr118945-2.c: New file.

Signed-off-by: Zhongyao Chen <[email protected]&gt;
---
&nbsp;gcc/config/riscv/riscv-protos.h&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
 |&nbsp; 1 +
&nbsp;gcc/config/riscv/riscv-v.cc&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
 | 12 +++-----
&nbsp;gcc/config/riscv/riscv-vsetvl.cc&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
 |&nbsp; 5 ++--
&nbsp;gcc/config/riscv/riscv.cc&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
 | 23 ++++++++++++++-
&nbsp;.../gcc.target/riscv/rvv/autovec/pr118945-1.c | 15 ++++++++++
&nbsp;.../gcc.target/riscv/rvv/autovec/pr118945-2.c | 28 +++++++++++++++++++
&nbsp;6 files changed, 73 insertions(+), 11 deletions(-)
&nbsp;create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
&nbsp;create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index e4473f45d..346d7a812 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -832,6 +832,7 @@ extern bool th_print_operand_address (FILE *, machine_mode, 
rtx);
&nbsp;#endif
&nbsp;
&nbsp;extern bool strided_load_broadcast_p (void);
+extern bool riscv_prefer_agnostic_p (void);
&nbsp;extern bool riscv_use_divmod_expander (void);
&nbsp;void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, 
tree, int);
&nbsp;extern bool
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 8021bc14e..1d7d8a61b 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2140,10 +2140,8 @@ get_ma (rtx ma)
&nbsp;enum tail_policy
&nbsp;get_prefer_tail_policy ()
&nbsp;{
-&nbsp; /* TODO: By default, we choose to use TAIL_ANY which allows
-&nbsp;&nbsp;&nbsp;&nbsp; compiler pick up either agnostic or undisturbed. 
Maybe we
-&nbsp;&nbsp;&nbsp;&nbsp; will have a compile option like -mprefer=agnostic to 
set
-&nbsp;&nbsp;&nbsp;&nbsp; this value???.&nbsp; */
+&nbsp; if (riscv_prefer_agnostic_p ())
+&nbsp;&nbsp;&nbsp; return TAIL_AGNOSTIC;
&nbsp;&nbsp; return TAIL_ANY;
&nbsp;}
&nbsp;
@@ -2151,10 +2149,8 @@ get_prefer_tail_policy ()
&nbsp;enum mask_policy
&nbsp;get_prefer_mask_policy ()
&nbsp;{
-&nbsp; /* TODO: By default, we choose to use MASK_ANY which allows
-&nbsp;&nbsp;&nbsp;&nbsp; compiler pick up either agnostic or undisturbed. 
Maybe we
-&nbsp;&nbsp;&nbsp;&nbsp; will have a compile option like -mprefer=agnostic to 
set
-&nbsp;&nbsp;&nbsp;&nbsp; this value???.&nbsp; */
+&nbsp; if (riscv_prefer_agnostic_p ())
+&nbsp;&nbsp;&nbsp; return MASK_AGNOSTIC;
&nbsp;&nbsp; return MASK_ANY;
&nbsp;}
&nbsp;
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 4fe0ae6d9..3586d0cdc 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1144,9 +1144,10 @@ public:
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; dflags |= demand_flags::DEMAND_LMUL_P;
&nbsp;&nbsp; }
&nbsp;
-if (!m_ta)
+/* Demand policy for agnostic if the uarch has a preference.&nbsp; */
+if (!m_ta || riscv_prefer_agnostic_p ())
&nbsp;&nbsp; dflags |= demand_flags::DEMAND_TAIL_POLICY_P;
-if (!m_ma)
+if (!m_ma || riscv_prefer_agnostic_p ())
&nbsp;&nbsp; dflags |= demand_flags::DEMAND_MASK_POLICY_P;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; }
&nbsp;
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 41ee4014c..bf3bcad4d 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -317,6 +317,7 @@ struct riscv_tune_param
&nbsp;&nbsp; const char *function_align;
&nbsp;&nbsp; const char *jump_align;
&nbsp;&nbsp; const char *loop_align;
+&nbsp; bool prefer_agnostic;
&nbsp;};
&nbsp;
&nbsp;
@@ -481,6 +482,7 @@ static const struct riscv_tune_param generic_tune_info = {
&nbsp;&nbsp; NULL,/* function_align */
&nbsp;&nbsp; NULL,/* jump_align */
&nbsp;&nbsp; NULL,/* loop_align */
+&nbsp; false,/* prefer-agnostic.&nbsp; */
&nbsp;};
&nbsp;
&nbsp;/* Costs to use when optimizing for rocket.&nbsp; */
@@ -505,6 +507,7 @@ static const struct riscv_tune_param rocket_tune_info = {
&nbsp;&nbsp; NULL,/* function_align */
&nbsp;&nbsp; NULL,/* jump_align */
&nbsp;&nbsp; NULL,/* loop_align */
+&nbsp; false,/* prefer-agnostic.&nbsp; */
&nbsp;};
&nbsp;
&nbsp;/* Costs to use when optimizing for Sifive 7 Series.&nbsp; */
@@ -529,6 +532,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
&nbsp;&nbsp; NULL,/* function_align */
&nbsp;&nbsp; NULL,/* jump_align */
&nbsp;&nbsp; NULL,/* loop_align */
+&nbsp; false,/* prefer-agnostic.&nbsp; */
&nbsp;};
&nbsp;
&nbsp;/* Costs to use when optimizing for Sifive p400 Series.&nbsp; */
@@ -553,6 +557,7 @@ static const struct riscv_tune_param sifive_p400_tune_info 
= {
&nbsp;&nbsp; NULL,/* function_align */
&nbsp;&nbsp; NULL,/* jump_align */
&nbsp;&nbsp; NULL,/* loop_align */
+&nbsp; true,/* prefer-agnostic.&nbsp; */
&nbsp;};
&nbsp;
&nbsp;/* Costs to use when optimizing for Sifive p600 Series.&nbsp; */
@@ -577,6 +582,7 @@ static const struct riscv_tune_param sifive_p600_tune_info 
= {
&nbsp;&nbsp; NULL,/* function_align */
&nbsp;&nbsp; NULL,/* jump_align */
&nbsp;&nbsp; NULL,/* loop_align */
+&nbsp; true,/* prefer-agnostic.&nbsp; */
&nbsp;};
&nbsp;
&nbsp;/* Costs to use when optimizing for T-HEAD c906.&nbsp; */
@@ -601,6 +607,7 @@ static const struct riscv_tune_param thead_c906_tune_info = 
{
&nbsp;&nbsp; NULL,/* function_align */
&nbsp;&nbsp; NULL,/* jump_align */
&nbsp;&nbsp; NULL,/* loop_align */
+&nbsp; false,/* prefer-agnostic.&nbsp; */
&nbsp;};
&nbsp;
&nbsp;/* Costs to use when optimizing for xiangshan nanhu.&nbsp; */
@@ -625,6 +632,7 @@ static const struct riscv_tune_param 
xiangshan_nanhu_tune_info = {
&nbsp;&nbsp; NULL,/* function_align */
&nbsp;&nbsp; NULL,/* jump_align */
&nbsp;&nbsp; NULL,/* loop_align */
+&nbsp; true,/* prefer-agnostic.&nbsp; */
&nbsp;};
&nbsp;
&nbsp;/* Costs to use when optimizing for a generic ooo profile.&nbsp; */
@@ -649,6 +657,7 @@ static const struct riscv_tune_param generic_ooo_tune_info 
= {
&nbsp;&nbsp; NULL,/* function_align */
&nbsp;&nbsp; NULL,/* jump_align */
&nbsp;&nbsp; NULL,/* loop_align */
+&nbsp; true,/* prefer-agnostic.&nbsp; */
&nbsp;};
&nbsp;
&nbsp;/* Costs to use when optimizing for Tenstorrent Ascalon 8 wide.&nbsp; */
@@ -673,6 +682,7 @@ static const struct riscv_tune_param 
tt_ascalon_d8_tune_info = {
&nbsp;&nbsp; NULL,/* function_align */
&nbsp;&nbsp; NULL,/* jump_align */
&nbsp;&nbsp; NULL,/* loop_align */
+&nbsp; true,/* prefer-agnostic.&nbsp; */
&nbsp;};
&nbsp;
&nbsp;/* Costs to use when optimizing for size.&nbsp; */
@@ -697,6 +707,7 @@ static const struct riscv_tune_param 
optimize_size_tune_info = {
&nbsp;&nbsp; NULL,/* function_align */
&nbsp;&nbsp; NULL,/* jump_align */
&nbsp;&nbsp; NULL,/* loop_align */
+&nbsp; false,/* prefer-agnostic.&nbsp; */
&nbsp;};
&nbsp;
&nbsp;/* Costs to use when optimizing for MIPS P8700 */
@@ -720,7 +731,8 @@ static const struct riscv_tune_param mips_p8700_tune_info = 
{
&nbsp;&nbsp; NULL,&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /* vector 
cost */
&nbsp;&nbsp; NULL,&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /* 
function_align */
&nbsp;&nbsp; NULL,&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /* 
jump_align */
-&nbsp; NULL,&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; /* loop_align */
+&nbsp; NULL,/* loop_align.&nbsp; */
+&nbsp; true,/* prefer-agnostic.&nbsp; */
&nbsp;};
&nbsp;
&nbsp;static bool riscv_avoid_shrink_wrapping_separate ();
@@ -12842,6 +12854,15 @@ strided_load_broadcast_p ()
&nbsp;&nbsp; return tune_param-&gt;use_zero_stride_load;
&nbsp;}
&nbsp;
+/* Return TRUE if we should use the tail agnostic and mask agnostic policies 
for
+&nbsp;&nbsp; vector code, false otherwise.&nbsp; */
+
+bool
+riscv_prefer_agnostic_p ()
+{
+&nbsp; return tune_param-&gt;prefer_agnostic;
+}
+
&nbsp;/* Return TRUE if we should use the divmod expander, FALSE 
otherwise.&nbsp; This
&nbsp;&nbsp;&nbsp; allows the behavior to be tuned for specific implementations 
as well as
&nbsp;&nbsp;&nbsp; when optimizing for size.&nbsp; */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
new file mode 100644
index 000000000..fc37bef12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mtune=generic-ooo -O3 -march=rv64gcv_zvl256b_zba -mabi=lp64d 
-mrvv-max-lmul=m2 -mrvv-vector-bits=scalable" } */
+
+int test(int* in, int n)
+{
+&nbsp; int accum = 0;
+&nbsp; for (int i = 0; i < n; i++)
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; accum += in[i];
+
+&nbsp; return accum;
+}
+
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-z0-9]+,\s*[a-z0-9]+,\s*e[0-9]+,\s*m[f0-9]+,\s*ta,\s*ma} 3 } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-z0-9]+,\s*[a-z0-9]+,\s*e[0-9]+,\s*m[f0-9]+,\s*tu,\s*ma} 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c
new file mode 100644
index 000000000..956574067
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rva23u64 -mtune=generic-ooo -Ofast -S" } */
+
+void vmult(
+&nbsp;&nbsp;&nbsp; double* dst,
+&nbsp;&nbsp;&nbsp; const double* src,
+&nbsp;&nbsp;&nbsp; const unsigned int* rowstart,
+&nbsp;&nbsp;&nbsp; const unsigned int* colnums,
+&nbsp;&nbsp;&nbsp; const double* val,
+&nbsp;&nbsp;&nbsp; const unsigned int n_rows
+) {
+&nbsp;&nbsp;&nbsp; const double* val_ptr = &amp;val[rowstart[0]];
+&nbsp;&nbsp;&nbsp; const unsigned int* colnum_ptr = &amp;colnums[rowstart[0]];
+&nbsp;&nbsp;&nbsp; double* dst_ptr = dst;
+
+&nbsp;&nbsp;&nbsp; for (unsigned int row = 0; row < n_rows; ++row) {
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; double s = 0.;
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; const double* const val_end_of_row 
= &amp;val[rowstart[row + 1]];
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; while (val_ptr != val_end_of_row) {
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; s += 
*val_ptr++ * src[*colnum_ptr++];
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; }
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; *dst_ptr++ = s;
+&nbsp;&nbsp;&nbsp; }
+}
+
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-z0-9]+,\s*[a-z0-9]+,\s*e[0-9]+,\s*m[f0-9]+,\s*ta,\s*ma} 4 } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-z0-9]+,\s*[a-z0-9]+,\s*e[0-9]+,\s*m[f0-9]+,\s*tu,\s*ma} 1 } } */
+
-- 
2.27.0

Reply via email to