Improve RISC-V vector code generation by preferring tail-agnostic (ta) and
mask-agnostic (ma) policies for vector instructions when merge operands
are undefined. This optimization, controlled by a uarch-specific
`prefer_agnostic`
tuning parameter, reduces `vsetvl` instructions and avoids conservative
undisturbed policy selections, addressing PR target/118945.
Changes from v1:
- According to review comments, make it "demand policy" for "agnostic"
instead of changing the fusion rule.
PR target/118945
gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_prefer_agnostic_p): New function.
(riscv_tune_param): Add prefer_agnostic member.
(various tune info structures): Initialize prefer_agnostic.
* config/riscv/riscv-protos.h (riscv_prefer_agnostic_p): Add
prototype.
* config/riscv/riscv-v.cc (get_prefer_tail_policy,
get_prefer_mask_policy): Use riscv_prefer_agnostic_p.
* config/riscv/riscv-vsetvl.cc (vsetvl_info::get_demand_flags):
demand policy for agnostic when prefer_agnostic is true.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/pr118945-1.c: New file.
* gcc.target/riscv/rvv/autovec/pr118945-2.c: New file.
---
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-v.cc | 12 +++------
gcc/config/riscv/riscv-vsetvl.cc | 4 +--
gcc/config/riscv/riscv.cc | 22 +++++++++++++++-
.../gcc.target/riscv/rvv/autovec/pr118945-1.c | 13 ++++++++++
.../gcc.target/riscv/rvv/autovec/pr118945-2.c | 26 +++++++++++++++++++
6 files changed, 67 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index e4473f45d..346d7a812 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -832,6 +832,7 @@ extern bool th_print_operand_address (FILE *, machine_mode,
rtx);
#endif
extern bool strided_load_broadcast_p (void);
+extern bool riscv_prefer_agnostic_p (void);
extern bool riscv_use_divmod_expander (void);
void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, tree,
int);
extern bool
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 8021bc14e..1d7d8a61b 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2140,10 +2140,8 @@ get_ma (rtx ma)
enum tail_policy
get_prefer_tail_policy ()
{
- /* TODO: By default, we choose to use TAIL_ANY which allows
- compiler pick up either agnostic or undisturbed. Maybe we
- will have a compile option like -mprefer=agnostic to set
- this value???. */
+ if (riscv_prefer_agnostic_p ())
+ return TAIL_AGNOSTIC;
return TAIL_ANY;
}
@@ -2151,10 +2149,8 @@ get_prefer_tail_policy ()
enum mask_policy
get_prefer_mask_policy ()
{
- /* TODO: By default, we choose to use MASK_ANY which allows
- compiler pick up either agnostic or undisturbed. Maybe we
- will have a compile option like -mprefer=agnostic to set
- this value???. */
+ if (riscv_prefer_agnostic_p ())
+ return MASK_AGNOSTIC;
return MASK_ANY;
}
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 4fe0ae6d9..fa4d21125 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1144,9 +1144,9 @@ public:
dflags |= demand_flags::DEMAND_LMUL_P;
}
- if (!m_ta)
+ if (!m_ta || riscv_prefer_agnostic_p ())
dflags |= demand_flags::DEMAND_TAIL_POLICY_P;
- if (!m_ma)
+ if (!m_ma || riscv_prefer_agnostic_p ())
dflags |= demand_flags::DEMAND_MASK_POLICY_P;
}
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 41ee4014c..0b1ec59f8 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -317,6 +317,7 @@ struct riscv_tune_param
const char *function_align;
const char *jump_align;
const char *loop_align;
+ bool prefer_agnostic;
};
@@ -481,6 +482,7 @@ static const struct riscv_tune_param generic_tune_info = {
NULL, /* function_align */
NULL, /* jump_align */
NULL, /* loop_align */
+ false, /* prefer-agnostic. */
};
/* Costs to use when optimizing for rocket. */
@@ -505,6 +507,7 @@ static const struct riscv_tune_param rocket_tune_info = {
NULL, /* function_align */
NULL, /* jump_align */
NULL, /* loop_align */
+ false, /* prefer-agnostic. */
};
/* Costs to use when optimizing for Sifive 7 Series. */
@@ -529,6 +532,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
NULL, /* function_align */
NULL, /* jump_align */
NULL, /* loop_align */
+ false, /* prefer-agnostic. */
};
/* Costs to use when optimizing for Sifive p400 Series. */
@@ -553,6 +557,7 @@ static const struct riscv_tune_param sifive_p400_tune_info
= {
NULL, /* function_align */
NULL, /* jump_align */
NULL, /* loop_align */
+ true, /* prefer-agnostic. */
};
/* Costs to use when optimizing for Sifive p600 Series. */
@@ -577,6 +582,7 @@ static const struct riscv_tune_param sifive_p600_tune_info
= {
NULL, /* function_align */
NULL, /* jump_align */
NULL, /* loop_align */
+ true, /* prefer-agnostic. */
};
/* Costs to use when optimizing for T-HEAD c906. */
@@ -601,6 +607,7 @@ static const struct riscv_tune_param thead_c906_tune_info =
{
NULL, /* function_align */
NULL, /* jump_align */
NULL, /* loop_align */
+ false, /* prefer-agnostic. */
};
/* Costs to use when optimizing for xiangshan nanhu. */
@@ -625,6 +632,7 @@ static const struct riscv_tune_param
xiangshan_nanhu_tune_info = {
NULL, /* function_align */
NULL, /* jump_align */
NULL, /* loop_align */
+ true, /* prefer-agnostic. */
};
/* Costs to use when optimizing for a generic ooo profile. */
@@ -649,6 +657,7 @@ static const struct riscv_tune_param generic_ooo_tune_info
= {
NULL, /* function_align */
NULL, /* jump_align */
NULL, /* loop_align */
+ true, /* prefer-agnostic. */
};
/* Costs to use when optimizing for Tenstorrent Ascalon 8 wide. */
@@ -673,6 +682,7 @@ static const struct riscv_tune_param
tt_ascalon_d8_tune_info = {
NULL, /* function_align */
NULL, /* jump_align */
NULL, /* loop_align */
+ true, /* prefer-agnostic. */
};
/* Costs to use when optimizing for size. */
@@ -697,6 +707,7 @@ static const struct riscv_tune_param
optimize_size_tune_info = {
NULL, /* function_align */
NULL, /* jump_align */
NULL, /* loop_align */
+ false, /* prefer-agnostic. */
};
/* Costs to use when optimizing for MIPS P8700 */
@@ -720,7 +731,8 @@ static const struct riscv_tune_param mips_p8700_tune_info =
{
NULL, /* vector cost */
NULL, /* function_align */
NULL, /* jump_align */
- NULL, /* loop_align */
+ NULL, /* loop_align. */
+ true, /* prefer-agnostic. */
};
static bool riscv_avoid_shrink_wrapping_separate ();
@@ -12842,6 +12854,14 @@ strided_load_broadcast_p ()
return tune_param->use_zero_stride_load;
}
+/* Return TRUE if we should prefer agnostic vector code, FALSE otherwise. */
+
+bool
+riscv_prefer_agnostic_p ()
+{
+ return tune_param->prefer_agnostic;
+}
+
/* Return TRUE if we should use the divmod expander, FALSE otherwise. This
allows the behavior to be tuned for specific implementations as well as
when optimizing for size. */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
new file mode 100644
index 000000000..49705bf7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mtune=generic-ooo -O3 -march=rv64gcv_zvl256b_zba -mabi=lp64d
-mrvv-max-lmul=m2 -mrvv-vector-bits=scalable" } */
+
+int test(int* in, int n)
+{
+ int accum = 0;
+ for (int i = 0; i < n; i++)
+ accum += in[i];
+
+ return accum;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 4 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c
new file mode 100644
index 000000000..9b9844446
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rva23u64 -mtune=generic-ooo -Ofast -S
-fno-schedule-insns -fno-schedule-insns2" } */
+
+void vmult(
+ double* dst,
+ const double* src,
+ const unsigned int* rowstart,
+ const unsigned int* colnums,
+ const double* val,
+ const unsigned int n_rows
+) {
+ const double* val_ptr = &val[rowstart[0]];
+ const unsigned int* colnum_ptr = &colnums[rowstart[0]];
+ double* dst_ptr = dst;
+
+ for (unsigned int row = 0; row < n_rows; ++row) {
+ double s = 0.;
+ const double* const val_end_of_row = &val[rowstart[row + 1]];
+ while (val_ptr != val_end_of_row) {
+ s += *val_ptr++ * src[*colnum_ptr++];
+ }
+ *dst_ptr++ = s;
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 5 } } */
--
2.43.0