Hi,
As discussed in the patchwork sync this patch adds a dynamic LMUL mode
that sets the LMUL to the ratio of largest/smallest type size in a loop,
with the maximum being LMUL8.
This is supposed to imitate what other architectures implicitly do by
vec_unpack_hi/lo. I have done cursory testing and obviously more
coverage would be preferred.
Regtested on rv64gcv_zvl512b.
Regards
Robin
PR target/122846
gcc/ChangeLog:
* config/riscv/riscv-opts.h (enum rvv_max_lmul_enum): Add
RVV_CONV_DYNAMIC.
(TARGET_MAX_LMUL): Ditto.
* config/riscv/riscv-string.cc (use_vector_stringop_p): Use
LMUL1 for RVV_CONV_DYNAMIC.
(expand_rawmemchr): Ditto.
(expand_strcmp): Ditto.
(check_vectorise_memory_operation): Ditto.
* config/riscv/riscv-vector-costs.cc (get_smallest_mode):
New function.
(compute_lmul_from_conversion_ratio): Calculate LMUL from
largest/smallest type.
(costs::has_unexpected_spills_p): Split.
(costs::compute_live_ranges_and_lmul): Compute smallest type and
call new function.
(costs::cleanup_live_range_data): New function.
(costs::compute_conversion_dynamic_lmul): New function.
(costs::record_potential_unexpected_spills): Use new function.
(costs::better_main_loop_than_p): Allow appropriate LMUL.
* config/riscv/riscv-vector-costs.h: Declare.
* config/riscv/riscv.opt: New option
-mrvv-max-lmul=conv-dynamic.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c: New test.
* gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c: New test.
* gcc.target/riscv/rvv/autovec/pr122846.c: New test.
---
gcc/config/riscv/riscv-opts.h | 7 +-
gcc/config/riscv/riscv-string.cc | 26 +-
gcc/config/riscv/riscv-vector-costs.cc | 226 ++++++++++++++----
gcc/config/riscv/riscv-vector-costs.h | 17 +-
gcc/config/riscv/riscv.opt | 3 +
.../riscv/rvv/autovec/dyn-lmul-conv-1.c | 42 ++++
.../riscv/rvv/autovec/dyn-lmul-conv-2.c | 43 ++++
.../gcc.target/riscv/rvv/autovec/pr122846.c | 14 ++
8 files changed, 320 insertions(+), 58 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 9b92a965e27..c6a09d59620 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -86,7 +86,9 @@ enum rvv_max_lmul_enum {
RVV_M4 = 4,
RVV_M8 = 8,
/* For dynamic LMUL, we compare COST start with LMUL8. */
- RVV_DYNAMIC = 9
+ RVV_DYNAMIC = 9,
+ /* For dynamic LMUL based on conversions, set LMUL based on type size ratio.
*/
+ RVV_CONV_DYNAMIC = 10
};
enum riscv_multilib_select_kind {
@@ -155,7 +157,8 @@ enum rvv_vector_bits_enum {
/* The maximum LMUL according to user configuration. */
#define TARGET_MAX_LMUL
\
- (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul)
+ (int) ((rvv_max_lmul == RVV_DYNAMIC || rvv_max_lmul == RVV_CONV_DYNAMIC) \
+ ? RVV_M8 : rvv_max_lmul)
/* TLS types. */
enum riscv_tls_type {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index c5710e4c896..ac9b19213a0 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1089,13 +1089,17 @@ use_vector_stringop_p (struct stringop_info &info,
HOST_WIDE_INT max_ew,
if (!TARGET_VECTOR || !(stringop_strategy & STRATEGY_VECTOR))
return false;
+ int max_lmul = TARGET_MAX_LMUL;
+ if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+ max_lmul = RVV_M1;
+
if (CONST_INT_P (length_in))
{
HOST_WIDE_INT length = INTVAL (length_in);
/* If the VLEN and preferred LMUL allow the entire block to be copied in
one go then no loop is needed. */
- if (known_le (length, BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL))
+ if (known_le (length, BYTES_PER_RISCV_VECTOR * max_lmul))
{
need_loop = false;
@@ -1130,10 +1134,10 @@ use_vector_stringop_p (struct stringop_info &info,
HOST_WIDE_INT max_ew,
poly_int64 nunits;
if (need_loop)
- per_iter = BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL;
+ per_iter = BYTES_PER_RISCV_VECTOR * max_lmul;
else
per_iter = length;
- /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL may not be divisible by
+ /* BYTES_PER_RISCV_VECTOR * MAX_LMUL may not be divisible by
this potential_ew. */
if (!multiple_p (per_iter, potential_ew, &nunits))
continue;
@@ -1164,7 +1168,7 @@ use_vector_stringop_p (struct stringop_info &info,
HOST_WIDE_INT max_ew,
pointless.
Still, by choosing a lower LMUL factor that still allows
an entire transfer, we can reduce register pressure. */
- for (unsigned lmul = 1; lmul < TARGET_MAX_LMUL; lmul <<= 1)
+ for (int lmul = 1; lmul < max_lmul; lmul <<= 1)
if (known_le (length * BITS_PER_UNIT, TARGET_MIN_VLEN * lmul)
&& multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew,
&mode_units)
@@ -1177,9 +1181,9 @@ use_vector_stringop_p (struct stringop_info &info,
HOST_WIDE_INT max_ew,
if (vmode != VOIDmode)
break;
- /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL will at least be divisible
+ /* BYTES_PER_RISCV_VECTOR * MAX_LMUL will at least be divisible
by potential_ew 1, so this should succeed eventually. */
- if (multiple_p (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL,
+ if (multiple_p (BYTES_PER_RISCV_VECTOR * max_lmul,
potential_ew, &mode_units)
&& riscv_vector::get_vector_mode (elem_mode,
mode_units).exists (&vmode))
@@ -1195,7 +1199,7 @@ use_vector_stringop_p (struct stringop_info &info,
HOST_WIDE_INT max_ew,
}
else
{
- gcc_assert (get_lmul_mode (QImode, TARGET_MAX_LMUL).exists (&vmode));
+ gcc_assert (get_lmul_mode (QImode, max_lmul).exists (&vmode));
}
/* A memcpy libcall in the worst case takes 3 instructions to prepare the
@@ -1356,6 +1360,8 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx
haystack, rtx needle,
unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
int lmul = TARGET_MAX_LMUL;
+ if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+ lmul = RVV_M1;
poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
machine_mode vmode;
@@ -1455,6 +1461,8 @@ expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes,
machine_mode mode = E_QImode;
unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
int lmul = TARGET_MAX_LMUL;
+ if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+ lmul = RVV_M1;
poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
machine_mode vmode;
@@ -1606,7 +1614,9 @@ check_vectorise_memory_operation (rtx length_in,
HOST_WIDE_INT &lmul_out)
if (rvv_max_lmul != RVV_DYNAMIC)
{
lmul_out = TARGET_MAX_LMUL;
- return (length <= ((TARGET_MAX_LMUL * TARGET_MIN_VLEN) / 8));
+ if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+ lmul_out = RVV_M1;
+ return (length <= ((lmul_out * TARGET_MIN_VLEN) / 8));
}
/* Find smallest lmul large enough for entire op. */
diff --git a/gcc/config/riscv/riscv-vector-costs.cc
b/gcc/config/riscv/riscv-vector-costs.cc
index 27ced61e815..41b4e4860b0 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -258,6 +258,14 @@ get_biggest_mode (machine_mode mode1, machine_mode mode2)
return mode1_size >= mode2_size ? mode1 : mode2;
}
+static machine_mode
+get_smallest_mode (machine_mode mode1, machine_mode mode2)
+{
+ unsigned int mode1_size = GET_MODE_BITSIZE (mode1).to_constant ();
+ unsigned int mode2_size = GET_MODE_BITSIZE (mode2).to_constant ();
+ return mode1_size <= mode2_size ? mode1 : mode2;
+}
+
/* Return true if OP is invariant. */
static bool
@@ -361,9 +369,11 @@ machine_mode
costs::compute_local_live_ranges (
loop_vec_info loop_vinfo,
const hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
- hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb)
+ hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb,
+ machine_mode *smallest_mode_out)
{
machine_mode biggest_mode = QImode;
+ machine_mode smallest_mode = TImode;
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
if (!program_points_per_bb.is_empty ())
{
@@ -396,6 +406,8 @@ costs::compute_local_live_ranges (
{
biggest_mode = get_biggest_mode (biggest_mode,
TYPE_MODE (TREE_TYPE (lhs)));
+ smallest_mode = get_smallest_mode (smallest_mode,
+ TYPE_MODE (TREE_TYPE
(lhs)));
bool existed_p = false;
pair &live_range
= live_ranges->get_or_insert (lhs, &existed_p);
@@ -415,6 +427,9 @@ costs::compute_local_live_ranges (
biggest_mode
= get_biggest_mode (biggest_mode,
TYPE_MODE (TREE_TYPE (var)));
+ smallest_mode
+ = get_smallest_mode (smallest_mode,
+ TYPE_MODE (TREE_TYPE (var)));
bool existed_p = false;
pair &live_range
= live_ranges->get_or_insert (var, &existed_p);
@@ -445,6 +460,8 @@ costs::compute_local_live_ranges (
(*r).second = MAX (point, (*r).second);
biggest_mode = get_biggest_mode (
biggest_mode, TYPE_MODE (TREE_TYPE (arg)));
+ smallest_mode = get_smallest_mode (
+ smallest_mode, TYPE_MODE (TREE_TYPE (arg)));
}
}
else
@@ -464,8 +481,14 @@ costs::compute_local_live_ranges (
}
}
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n",
- GET_MODE_NAME (biggest_mode));
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n",
+ GET_MODE_NAME (biggest_mode));
+ dump_printf_loc (MSG_NOTE, vect_location, "Smallest mode = %s\n",
+ GET_MODE_NAME (smallest_mode));
+ }
+ if (smallest_mode_out)
+ *smallest_mode_out = smallest_mode;
return biggest_mode;
}
@@ -639,6 +662,25 @@ compute_estimated_lmul (loop_vec_info loop_vinfo,
machine_mode mode)
return 0;
}
+/* Compute LMUL based on the ratio of biggest to smallest type size.
+ This is used for RVV_CONV_DYNAMIC. */
+static int
+compute_lmul_from_conversion_ratio (machine_mode biggest_mode,
+ machine_mode smallest_mode)
+{
+ gcc_assert (GET_MODE_BITSIZE (biggest_mode).is_constant ());
+ gcc_assert (GET_MODE_BITSIZE (smallest_mode).is_constant ());
+
+ unsigned int biggest_size = GET_MODE_BITSIZE (biggest_mode).to_constant ();
+ unsigned int smallest_size = GET_MODE_BITSIZE (smallest_mode).to_constant ();
+
+ int lmul = biggest_size / smallest_size;
+ lmul = std::min (lmul, (int) RVV_M8);
+ lmul = std::max (lmul, (int) RVV_M1);
+
+ return lmul;
+}
+
/* Update the live ranges according PHI.
Loop:
@@ -825,56 +867,37 @@ costs::update_local_live_ranges (
}
}
-/* Compute the maximum live V_REGS. */
-bool
-costs::has_unexpected_spills_p (loop_vec_info loop_vinfo)
+/* Helper to compute live ranges, modes, and LMUL. */
+void
+costs::compute_live_ranges_and_lmul (loop_vec_info loop_vinfo,
+ hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
+ hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb,
+ machine_mode &biggest_mode, machine_mode &smallest_mode, int &lmul)
{
- /* Compute local program points.
- It's a fast and effective computation. */
- hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
compute_local_program_points (loop_vinfo, program_points_per_bb);
- /* Compute local live ranges. */
- hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
- machine_mode biggest_mode
- = compute_local_live_ranges (loop_vinfo, program_points_per_bb,
- live_ranges_per_bb);
+ smallest_mode = TImode;
+ biggest_mode = compute_local_live_ranges (loop_vinfo, program_points_per_bb,
+ live_ranges_per_bb, &smallest_mode);
- /* Update live ranges according to PHI. */
update_local_live_ranges (loop_vinfo, program_points_per_bb,
live_ranges_per_bb, &biggest_mode);
- int lmul = compute_estimated_lmul (loop_vinfo, biggest_mode);
+ if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+ lmul = compute_lmul_from_conversion_ratio (biggest_mode, smallest_mode);
+ else
+ lmul = compute_estimated_lmul (loop_vinfo, biggest_mode);
+
gcc_assert (lmul <= RVV_M8);
- /* TODO: We calculate the maximum live vars base on current STMTS
- sequence. We can support live range shrink if it can give us
- big improvement in the future. */
- if (lmul > RVV_M1)
- {
- if (!live_ranges_per_bb.is_empty ())
- {
- unsigned int max_nregs = 0;
- for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter
- = live_ranges_per_bb.begin ();
- iter != live_ranges_per_bb.end (); ++iter)
- {
- basic_block bb = (*iter).first;
- unsigned int max_point
- = (*program_points_per_bb.get (bb)).length () + 1;
- if ((*iter).second.is_empty ())
- continue;
- /* We prefer larger LMUL unless it causes register spillings. */
- unsigned int nregs
- = max_number_of_live_regs (loop_vinfo, bb, (*iter).second,
- max_point, biggest_mode, lmul);
- if (nregs > max_nregs)
- max_nregs = nregs;
- }
- live_ranges_per_bb.empty ();
- if (max_nregs > V_REG_NUM)
- return true;
- }
- }
+}
+
+/* Helper to clean up live range data structures. */
+void
+costs::cleanup_live_range_data (hash_map<basic_block, vec<stmt_point>>
+ &program_points_per_bb,
+ hash_map<basic_block, hash_map<tree, pair>>
+ &live_ranges_per_bb)
+{
if (!program_points_per_bb.is_empty ())
{
for (hash_map<basic_block, vec<stmt_point>>::iterator iter
@@ -887,7 +910,72 @@ costs::has_unexpected_spills_p (loop_vec_info loop_vinfo)
}
program_points_per_bb.empty ();
}
- return false;
+ live_ranges_per_bb.empty ();
+}
+
+/* Compute LMUL for RVV_CONV_DYNAMIC mode based on conversion ratio. */
+void
+costs::compute_conversion_dynamic_lmul (loop_vec_info loop_vinfo)
+{
+ hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
+ hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
+ machine_mode biggest_mode, smallest_mode;
+ int lmul;
+
+ compute_live_ranges_and_lmul (loop_vinfo, program_points_per_bb,
+ live_ranges_per_bb, biggest_mode,
+ smallest_mode, lmul);
+
+ /* Store the computed LMUL and biggest mode for later comparison
+ in cost model. */
+ m_computed_lmul_from_conv = lmul;
+ m_biggest_mode_for_conv = biggest_mode;
+
+ cleanup_live_range_data (program_points_per_bb, live_ranges_per_bb);
+}
+
+/* Compute the maximum live V_REGS and check for unexpected spills. */
+bool
+costs::has_unexpected_spills_p (loop_vec_info loop_vinfo)
+{
+ hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
+ hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
+ machine_mode biggest_mode, smallest_mode;
+ int lmul;
+
+ compute_live_ranges_and_lmul (loop_vinfo, program_points_per_bb,
+ live_ranges_per_bb, biggest_mode,
+ smallest_mode, lmul);
+
+ /* TODO: We calculate the maximum live vars base on current STMTS
+ sequence. We can support live range shrink if it can give us
+ big improvement in the future. */
+ bool has_spills = false;
+ if (lmul > RVV_M1 && !live_ranges_per_bb.is_empty ())
+ {
+ unsigned int max_nregs = 0;
+ for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter
+ = live_ranges_per_bb.begin ();
+ iter != live_ranges_per_bb.end (); ++iter)
+ {
+ basic_block bb = (*iter).first;
+ unsigned int max_point
+ = (*program_points_per_bb.get (bb)).length () + 1;
+ if ((*iter).second.is_empty ())
+ continue;
+ /* We prefer larger LMUL unless it causes register spillings. */
+ unsigned int nregs
+ = max_number_of_live_regs (loop_vinfo, bb, (*iter).second,
+ max_point, biggest_mode, lmul);
+ if (nregs > max_nregs)
+ max_nregs = nregs;
+ }
+ if (max_nregs > V_REG_NUM)
+ has_spills = true;
+ }
+
+ cleanup_live_range_data (program_points_per_bb, live_ranges_per_bb);
+ return has_spills;
}
costs::costs (vec_info *vinfo, bool costing_for_scalar)
@@ -937,6 +1025,8 @@ costs::record_potential_unexpected_spills (loop_vec_info
loop_vinfo)
if (!post_dom_available_p)
free_dominance_info (CDI_POST_DOMINATORS);
}
+ else if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+ compute_conversion_dynamic_lmul (loop_vinfo);
}
/* Decide whether to use the unrolling heuristic described above
@@ -1033,6 +1123,50 @@ costs::better_main_loop_than_p (const vector_costs
*uncast_other) const
return other_prefer_unrolled;
}
}
+ else if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+ {
+ if (this->m_computed_lmul_from_conv > 0
+ && other->m_computed_lmul_from_conv > 0
+ && this->m_biggest_mode_for_conv != VOIDmode)
+ {
+ int this_vf = vect_vf_for_cost (this_loop_vinfo);
+ int other_vf = vect_vf_for_cost (other_loop_vinfo);
+
+ /* Get element size from the biggest mode. */
+ unsigned int element_bits
+ = GET_MODE_BITSIZE (this->m_biggest_mode_for_conv).to_constant ();
+
+ /* Estimate LMUL from VF * element_size / MIN_VLEN. */
+ int this_lmul = (this_vf * element_bits) / TARGET_MIN_VLEN;
+ int other_lmul = (other_vf * element_bits) / TARGET_MIN_VLEN;
+
+ /* Clamp to valid LMUL range. */
+ this_lmul = MAX (1, MIN (this_lmul, 8));
+ other_lmul = MAX (1, MIN (other_lmul, 8));
+
+ int target_lmul = this->m_computed_lmul_from_conv;
+
+ /* Prefer the LMUL that exactly matches our computed ratio. */
+ if (this_lmul == target_lmul && other_lmul != target_lmul)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Preferring LMUL=%d loop because it matches"
+ " conversion ratio (other LMUL=%d)\n",
+ this_lmul, other_lmul);
+ return true;
+ }
+ else if (this_lmul != target_lmul && other_lmul == target_lmul)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Preferring other LMUL=%d loop because it
matches"
+ " conversion ratio (this LMUL=%d)\n",
+ other_lmul, this_lmul);
+ return false;
+ }
+ }
+ }
else if (rvv_max_lmul == RVV_DYNAMIC)
{
if (other->m_has_unexpected_spills_p)
diff --git a/gcc/config/riscv/riscv-vector-costs.h
b/gcc/config/riscv/riscv-vector-costs.h
index b84ceb1d3cf..89f813c3d98 100644
--- a/gcc/config/riscv/riscv-vector-costs.h
+++ b/gcc/config/riscv/riscv-vector-costs.h
@@ -106,6 +106,11 @@ private:
bool m_has_unexpected_spills_p = false;
void record_potential_unexpected_spills (loop_vec_info);
+ /* For RVV_DYNAMIC_CONV mode, store the LMUL computed from conversion ratio
+ and the biggest mode used in the computation. */
+ int m_computed_lmul_from_conv = 0;
+ machine_mode m_biggest_mode_for_conv = VOIDmode;
+
void compute_local_program_points (vec_info *,
hash_map<basic_block, vec<stmt_point>> &);
void update_local_live_ranges (vec_info *,
@@ -114,9 +119,17 @@ private:
machine_mode *);
machine_mode compute_local_live_ranges
(loop_vec_info, const hash_map<basic_block, vec<stmt_point>> &,
- hash_map<basic_block, hash_map<tree, pair>> &);
-
+ hash_map<basic_block, hash_map<tree, pair>> &,
+ machine_mode * = nullptr);
+
+ void compute_live_ranges_and_lmul (loop_vec_info,
+ hash_map<basic_block, vec<stmt_point>> &,
+ hash_map<basic_block, hash_map<tree,
pair>> &,
+ machine_mode &, machine_mode &, int &);
+ void cleanup_live_range_data (hash_map<basic_block, vec<stmt_point>> &,
+ hash_map<basic_block, hash_map<tree, pair>> &);
bool has_unexpected_spills_p (loop_vec_info);
+ void compute_conversion_dynamic_lmul (loop_vec_info);
bool need_additional_vector_vars_p (stmt_vec_info, slp_tree);
void adjust_vect_cost_per_loop (loop_vec_info);
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 452062c6500..de7730a8961 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -313,6 +313,9 @@ Enum(rvv_max_lmul) String(m8) Value(RVV_M8)
EnumValue
Enum(rvv_max_lmul) String(dynamic) Value(RVV_DYNAMIC)
+EnumValue
+Enum(rvv_max_lmul) String(conv-dynamic) Value(RVV_CONV_DYNAMIC)
+
mrvv-max-lmul=
Target RejectNegative Joined Enum(rvv_max_lmul) Var(rvv_max_lmul) Init(RVV_M1)
-mrvv-max-lmul=<string> Set the RVV LMUL of auto-vectorization.
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c
new file mode 100644
index 00000000000..b07bd86f76e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" }
*/
+
+void foo2x1 (short *restrict a, char *restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = b[i];
+}
+
+void foo2x2 (int *restrict a, short *restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = b[i];
+}
+
+void foo2x3 (long *restrict a, int *restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = b[i];
+}
+
+void foo4x1 (int *restrict a, char *restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = b[i];
+}
+
+void foo4x2 (long *restrict a, short *restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = b[i];
+}
+
+void foo8x (long *restrict a, char *restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = b[i];
+}
+
+/* { dg-final { scan-assembler-times ",m2," 3 } } */
+/* { dg-final { scan-assembler-times ",m4," 2 } } */
+/* { dg-final { scan-assembler-times ",m8," 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c
new file mode 100644
index 00000000000..c37e4dd63f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" }
*/
+
+void foo2x1 (unsigned char *restrict a, unsigned short *restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = b[i];
+}
+
+void foo2x2 (unsigned short *restrict a, unsigned int *restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = b[i];
+}
+
+void foo2x3 (unsigned int *restrict a, unsigned long *restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = b[i];
+}
+
+void foo4x1 (unsigned char *restrict a, unsigned int *restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = b[i];
+}
+
+void foo4x2 (unsigned short *restrict a, unsigned long *restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = b[i];
+}
+
+void foo8x (unsigned char *restrict a, unsigned long *restrict b, int n)
+{
+ for (int i = 0; i < n; i++)
+ a[i] = b[i];
+}
+
+/* { dg-final { scan-assembler-times ",m1," 6 } } */
+/* { dg-final { scan-assembler-times ",m2," 3 } } */
+/* { dg-final { scan-assembler-times ",m4," 1 } } */
+/* { dg-final { scan-assembler-not ",mf2," } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c
new file mode 100644
index 00000000000..7753a66cd96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" }
*/
+
+int
+foo (const char *x, const char *y)
+{
+ int sum = 0;
+ for (int i = 0; i < 1024; i++)
+ sum += x[i] * y[i];
+ return sum;
+}
+
+/* One for the initial value, one for the reduction. */
+/* { dg-final { scan-assembler-times ",m4," 2 } } */
--
2.51.1