[PATCH] RISC-V: -mrvv-max-lmul=conv-dynamic [PR122846].

Robin Dapp Fri, 12 Dec 2025 06:23:20 -0800

Hi,

As discussed in the patchwork sync this patch adds a dynamic LMUL mode
that sets the LMUL to the ratio of largest/smallest type size in a loop,
with the maximum being LMUL8.


This is supposed to imitate what other architectures implicitly do by
vec_unpack_hi/lo.  I have done cursory testing and obviously more
coverage would be preferred.

Regtested on rv64gcv_zvl512b.

Regards
 Robin

        PR target/122846

gcc/ChangeLog:

        * config/riscv/riscv-opts.h (enum rvv_max_lmul_enum): Add
        RVV_CONV_DYNAMIC.
        (TARGET_MAX_LMUL): Ditto.
        * config/riscv/riscv-string.cc (use_vector_stringop_p): Use
        LMUL1 for RVV_CONV_DYNAMIC.
        (expand_rawmemchr): Ditto.
        (expand_strcmp): Ditto.
        (check_vectorise_memory_operation): Ditto.
        * config/riscv/riscv-vector-costs.cc (get_smallest_mode):
        New function.
        (compute_lmul_from_conversion_ratio): Calculate LMUL from
        largest/smallest type.
        (costs::has_unexpected_spills_p): Split.
        (costs::compute_live_ranges_and_lmul): Compute smallest type and
        call new function.
        (costs::cleanup_live_range_data): New function.
        (costs::compute_conversion_dynamic_lmul): New function.
        (costs::record_potential_unexpected_spills): Use new function.
        (costs::better_main_loop_than_p): Allow appropriate LMUL.
        * config/riscv/riscv-vector-costs.h: Declare.
        * config/riscv/riscv.opt: New option
        -mrvv-max-lmul=conv-dynamic.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c: New test.
        * gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c: New test.
        * gcc.target/riscv/rvv/autovec/pr122846.c: New test.
---
 gcc/config/riscv/riscv-opts.h                 |   7 +-
 gcc/config/riscv/riscv-string.cc              |  26 +-
 gcc/config/riscv/riscv-vector-costs.cc        | 226 ++++++++++++++----
 gcc/config/riscv/riscv-vector-costs.h         |  17 +-
 gcc/config/riscv/riscv.opt                    |   3 +
 .../riscv/rvv/autovec/dyn-lmul-conv-1.c       |  42 ++++
 .../riscv/rvv/autovec/dyn-lmul-conv-2.c       |  43 ++++
 .../gcc.target/riscv/rvv/autovec/pr122846.c   |  14 ++
 8 files changed, 320 insertions(+), 58 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c

diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 9b92a965e27..c6a09d59620 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -86,7 +86,9 @@ enum rvv_max_lmul_enum {
   RVV_M4 = 4,
   RVV_M8 = 8,
   /* For dynamic LMUL, we compare COST start with LMUL8.  */
-  RVV_DYNAMIC = 9
+  RVV_DYNAMIC = 9,
+  /* For dynamic LMUL based on conversions, set LMUL based on type size ratio. 
 */
+  RVV_CONV_DYNAMIC = 10
 };
 
 enum riscv_multilib_select_kind {
@@ -155,7 +157,8 @@ enum rvv_vector_bits_enum {
 
 /* The maximum LMUL according to user configuration.  */
 #define TARGET_MAX_LMUL                                                        
\
-  (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul)
+  (int) ((rvv_max_lmul == RVV_DYNAMIC || rvv_max_lmul == RVV_CONV_DYNAMIC) \
+        ? RVV_M8 : rvv_max_lmul)
 
 /* TLS types.  */
 enum riscv_tls_type {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index c5710e4c896..ac9b19213a0 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1089,13 +1089,17 @@ use_vector_stringop_p (struct stringop_info &info, 
HOST_WIDE_INT max_ew,
   if (!TARGET_VECTOR || !(stringop_strategy & STRATEGY_VECTOR))
     return false;
 
+  int max_lmul = TARGET_MAX_LMUL;
+  if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+    max_lmul = RVV_M1;
+
   if (CONST_INT_P (length_in))
     {
       HOST_WIDE_INT length = INTVAL (length_in);
 
       /* If the VLEN and preferred LMUL allow the entire block to be copied in
         one go then no loop is needed.  */
-      if (known_le (length, BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL))
+      if (known_le (length, BYTES_PER_RISCV_VECTOR * max_lmul))
        {
          need_loop = false;
 
@@ -1130,10 +1134,10 @@ use_vector_stringop_p (struct stringop_info &info, 
HOST_WIDE_INT max_ew,
          poly_int64 nunits;
 
          if (need_loop)
-           per_iter = BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL;
+           per_iter = BYTES_PER_RISCV_VECTOR * max_lmul;
          else
            per_iter = length;
-         /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL may not be divisible by
+         /* BYTES_PER_RISCV_VECTOR * MAX_LMUL may not be divisible by
             this potential_ew.  */
          if (!multiple_p (per_iter, potential_ew, &nunits))
            continue;
@@ -1164,7 +1168,7 @@ use_vector_stringop_p (struct stringop_info &info, 
HOST_WIDE_INT max_ew,
                 pointless.
                 Still, by choosing a lower LMUL factor that still allows
                 an entire transfer, we can reduce register pressure.  */
-             for (unsigned lmul = 1; lmul < TARGET_MAX_LMUL; lmul <<= 1)
+             for (int lmul = 1; lmul < max_lmul; lmul <<= 1)
                if (known_le (length * BITS_PER_UNIT, TARGET_MIN_VLEN * lmul)
                    && multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew,
                                   &mode_units)
@@ -1177,9 +1181,9 @@ use_vector_stringop_p (struct stringop_info &info, 
HOST_WIDE_INT max_ew,
          if (vmode != VOIDmode)
            break;
 
-         /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL will at least be divisible
+         /* BYTES_PER_RISCV_VECTOR * MAX_LMUL will at least be divisible
             by potential_ew 1, so this should succeed eventually.  */
-         if (multiple_p (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL,
+         if (multiple_p (BYTES_PER_RISCV_VECTOR * max_lmul,
                          potential_ew, &mode_units)
              && riscv_vector::get_vector_mode (elem_mode,
                                                mode_units).exists (&vmode))
@@ -1195,7 +1199,7 @@ use_vector_stringop_p (struct stringop_info &info, 
HOST_WIDE_INT max_ew,
     }
   else
     {
-      gcc_assert (get_lmul_mode (QImode, TARGET_MAX_LMUL).exists (&vmode));
+      gcc_assert (get_lmul_mode (QImode, max_lmul).exists (&vmode));
     }
 
   /* A memcpy libcall in the worst case takes 3 instructions to prepare the
@@ -1356,6 +1360,8 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx 
haystack, rtx needle,
 
   unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
   int lmul = TARGET_MAX_LMUL;
+  if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+    lmul = RVV_M1;
   poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
 
   machine_mode vmode;
@@ -1455,6 +1461,8 @@ expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes,
   machine_mode mode = E_QImode;
   unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
   int lmul = TARGET_MAX_LMUL;
+  if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+    lmul = RVV_M1;
   poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
 
   machine_mode vmode;
@@ -1606,7 +1614,9 @@ check_vectorise_memory_operation (rtx length_in, 
HOST_WIDE_INT &lmul_out)
   if (rvv_max_lmul != RVV_DYNAMIC)
     {
       lmul_out = TARGET_MAX_LMUL;
-      return (length <= ((TARGET_MAX_LMUL * TARGET_MIN_VLEN) / 8));
+      if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+       lmul_out = RVV_M1;
+      return (length <= ((lmul_out * TARGET_MIN_VLEN) / 8));
     }
 
   /* Find smallest lmul large enough for entire op.  */
diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index 27ced61e815..41b4e4860b0 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -258,6 +258,14 @@ get_biggest_mode (machine_mode mode1, machine_mode mode2)
   return mode1_size >= mode2_size ? mode1 : mode2;
 }
 
+static machine_mode
+get_smallest_mode (machine_mode mode1, machine_mode mode2)
+{
+  unsigned int mode1_size = GET_MODE_BITSIZE (mode1).to_constant ();
+  unsigned int mode2_size = GET_MODE_BITSIZE (mode2).to_constant ();
+  return mode1_size <= mode2_size ? mode1 : mode2;
+}
+
 /* Return true if OP is invariant.  */
 
 static bool
@@ -361,9 +369,11 @@ machine_mode
 costs::compute_local_live_ranges (
   loop_vec_info loop_vinfo,
   const hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
-  hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb)
+  hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb,
+  machine_mode *smallest_mode_out)
 {
   machine_mode biggest_mode = QImode;
+  machine_mode smallest_mode = TImode;
   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   if (!program_points_per_bb.is_empty ())
     {
@@ -396,6 +406,8 @@ costs::compute_local_live_ranges (
                {
                  biggest_mode = get_biggest_mode (biggest_mode,
                                                   TYPE_MODE (TREE_TYPE (lhs)));
+                 smallest_mode = get_smallest_mode (smallest_mode,
+                                                    TYPE_MODE (TREE_TYPE 
(lhs)));
                  bool existed_p = false;
                  pair &live_range
                    = live_ranges->get_or_insert (lhs, &existed_p);
@@ -415,6 +427,9 @@ costs::compute_local_live_ranges (
                      biggest_mode
                        = get_biggest_mode (biggest_mode,
                                            TYPE_MODE (TREE_TYPE (var)));
+                     smallest_mode
+                       = get_smallest_mode (smallest_mode,
+                                            TYPE_MODE (TREE_TYPE (var)));
                      bool existed_p = false;
                      pair &live_range
                        = live_ranges->get_or_insert (var, &existed_p);
@@ -445,6 +460,8 @@ costs::compute_local_live_ranges (
                                  (*r).second = MAX (point, (*r).second);
                                  biggest_mode = get_biggest_mode (
                                    biggest_mode, TYPE_MODE (TREE_TYPE (arg)));
+                                 smallest_mode = get_smallest_mode (
+                                   smallest_mode, TYPE_MODE (TREE_TYPE (arg)));
                                }
                            }
                          else
@@ -464,8 +481,14 @@ costs::compute_local_live_ranges (
        }
     }
   if (dump_enabled_p ())
-    dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n",
-                    GET_MODE_NAME (biggest_mode));
+    {
+      dump_printf_loc (MSG_NOTE, vect_location, "Biggest mode = %s\n",
+                      GET_MODE_NAME (biggest_mode));
+      dump_printf_loc (MSG_NOTE, vect_location, "Smallest mode = %s\n",
+                      GET_MODE_NAME (smallest_mode));
+    }
+  if (smallest_mode_out)
+    *smallest_mode_out = smallest_mode;
   return biggest_mode;
 }
 
@@ -639,6 +662,25 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, 
machine_mode mode)
   return 0;
 }
 
+/* Compute LMUL based on the ratio of biggest to smallest type size.
+   This is used for RVV_CONV_DYNAMIC.  */
+static int
+compute_lmul_from_conversion_ratio (machine_mode biggest_mode,
+                                   machine_mode smallest_mode)
+{
+  gcc_assert (GET_MODE_BITSIZE (biggest_mode).is_constant ());
+  gcc_assert (GET_MODE_BITSIZE (smallest_mode).is_constant ());
+
+  unsigned int biggest_size = GET_MODE_BITSIZE (biggest_mode).to_constant ();
+  unsigned int smallest_size = GET_MODE_BITSIZE (smallest_mode).to_constant ();
+
+  int lmul = biggest_size / smallest_size;
+  lmul = std::min (lmul, (int) RVV_M8);
+  lmul = std::max (lmul, (int) RVV_M1);
+
+  return lmul;
+}
+
 /* Update the live ranges according PHI.
 
    Loop:
@@ -825,56 +867,37 @@ costs::update_local_live_ranges (
     }
 }
 
-/* Compute the maximum live V_REGS.  */
-bool
-costs::has_unexpected_spills_p (loop_vec_info loop_vinfo)
+/* Helper to compute live ranges, modes, and LMUL.  */
+void
+costs::compute_live_ranges_and_lmul (loop_vec_info loop_vinfo,
+  hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
+  hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb,
+  machine_mode &biggest_mode, machine_mode &smallest_mode, int &lmul)
 {
-  /* Compute local program points.
-     It's a fast and effective computation.  */
-  hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
   compute_local_program_points (loop_vinfo, program_points_per_bb);
 
-  /* Compute local live ranges.  */
-  hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
-  machine_mode biggest_mode
-    = compute_local_live_ranges (loop_vinfo, program_points_per_bb,
-                                live_ranges_per_bb);
+  smallest_mode = TImode;
+  biggest_mode = compute_local_live_ranges (loop_vinfo, program_points_per_bb,
+                                           live_ranges_per_bb, &smallest_mode);
 
-  /* Update live ranges according to PHI.  */
   update_local_live_ranges (loop_vinfo, program_points_per_bb,
                            live_ranges_per_bb, &biggest_mode);
 
-  int lmul = compute_estimated_lmul (loop_vinfo, biggest_mode);
+  if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+    lmul = compute_lmul_from_conversion_ratio (biggest_mode, smallest_mode);
+  else
+    lmul = compute_estimated_lmul (loop_vinfo, biggest_mode);
+
   gcc_assert (lmul <= RVV_M8);
-  /* TODO: We calculate the maximum live vars base on current STMTS
-     sequence.  We can support live range shrink if it can give us
-     big improvement in the future.  */
-  if (lmul > RVV_M1)
-    {
-      if (!live_ranges_per_bb.is_empty ())
-       {
-         unsigned int max_nregs = 0;
-         for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter
-              = live_ranges_per_bb.begin ();
-              iter != live_ranges_per_bb.end (); ++iter)
-           {
-             basic_block bb = (*iter).first;
-             unsigned int max_point
-               = (*program_points_per_bb.get (bb)).length () + 1;
-             if ((*iter).second.is_empty ())
-               continue;
-             /* We prefer larger LMUL unless it causes register spillings. */
-             unsigned int nregs
-               = max_number_of_live_regs (loop_vinfo, bb, (*iter).second,
-                                          max_point, biggest_mode, lmul);
-             if (nregs > max_nregs)
-               max_nregs = nregs;
-           }
-         live_ranges_per_bb.empty ();
-         if (max_nregs > V_REG_NUM)
-           return true;
-       }
-    }
+}
+
+/* Helper to clean up live range data structures.  */
+void
+costs::cleanup_live_range_data (hash_map<basic_block, vec<stmt_point>>
+                               &program_points_per_bb,
+                               hash_map<basic_block, hash_map<tree, pair>>
+                               &live_ranges_per_bb)
+{
   if (!program_points_per_bb.is_empty ())
     {
       for (hash_map<basic_block, vec<stmt_point>>::iterator iter
@@ -887,7 +910,72 @@ costs::has_unexpected_spills_p (loop_vec_info loop_vinfo)
        }
       program_points_per_bb.empty ();
     }
-  return false;
+  live_ranges_per_bb.empty ();
+}
+
+/* Compute LMUL for RVV_CONV_DYNAMIC mode based on conversion ratio.  */
+void
+costs::compute_conversion_dynamic_lmul (loop_vec_info loop_vinfo)
+{
+  hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
+  hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
+  machine_mode biggest_mode, smallest_mode;
+  int lmul;
+
+  compute_live_ranges_and_lmul (loop_vinfo, program_points_per_bb,
+                               live_ranges_per_bb, biggest_mode,
+                               smallest_mode, lmul);
+
+  /* Store the computed LMUL and biggest mode for later comparison
+     in cost model.  */
+  m_computed_lmul_from_conv = lmul;
+  m_biggest_mode_for_conv = biggest_mode;
+
+  cleanup_live_range_data (program_points_per_bb, live_ranges_per_bb);
+}
+
+/* Compute the maximum live V_REGS and check for unexpected spills.  */
+bool
+costs::has_unexpected_spills_p (loop_vec_info loop_vinfo)
+{
+  hash_map<basic_block, vec<stmt_point>> program_points_per_bb;
+  hash_map<basic_block, hash_map<tree, pair>> live_ranges_per_bb;
+  machine_mode biggest_mode, smallest_mode;
+  int lmul;
+
+  compute_live_ranges_and_lmul (loop_vinfo, program_points_per_bb,
+                               live_ranges_per_bb, biggest_mode,
+                               smallest_mode, lmul);
+
+  /* TODO: We calculate the maximum live vars base on current STMTS
+     sequence.  We can support live range shrink if it can give us
+     big improvement in the future.  */
+  bool has_spills = false;
+  if (lmul > RVV_M1 && !live_ranges_per_bb.is_empty ())
+    {
+      unsigned int max_nregs = 0;
+      for (hash_map<basic_block, hash_map<tree, pair>>::iterator iter
+          = live_ranges_per_bb.begin ();
+          iter != live_ranges_per_bb.end (); ++iter)
+       {
+         basic_block bb = (*iter).first;
+         unsigned int max_point
+           = (*program_points_per_bb.get (bb)).length () + 1;
+         if ((*iter).second.is_empty ())
+           continue;
+         /* We prefer larger LMUL unless it causes register spillings.  */
+         unsigned int nregs
+           = max_number_of_live_regs (loop_vinfo, bb, (*iter).second,
+                                      max_point, biggest_mode, lmul);
+         if (nregs > max_nregs)
+           max_nregs = nregs;
+       }
+      if (max_nregs > V_REG_NUM)
+       has_spills = true;
+    }
+
+  cleanup_live_range_data (program_points_per_bb, live_ranges_per_bb);
+  return has_spills;
 }
 
 costs::costs (vec_info *vinfo, bool costing_for_scalar)
@@ -937,6 +1025,8 @@ costs::record_potential_unexpected_spills (loop_vec_info 
loop_vinfo)
       if (!post_dom_available_p)
        free_dominance_info (CDI_POST_DOMINATORS);
     }
+  else if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+    compute_conversion_dynamic_lmul (loop_vinfo);
 }
 
 /* Decide whether to use the unrolling heuristic described above
@@ -1033,6 +1123,50 @@ costs::better_main_loop_than_p (const vector_costs 
*uncast_other) const
          return other_prefer_unrolled;
        }
     }
+  else if (rvv_max_lmul == RVV_CONV_DYNAMIC)
+    {
+      if (this->m_computed_lmul_from_conv > 0
+         && other->m_computed_lmul_from_conv > 0
+         && this->m_biggest_mode_for_conv != VOIDmode)
+       {
+         int this_vf = vect_vf_for_cost (this_loop_vinfo);
+         int other_vf = vect_vf_for_cost (other_loop_vinfo);
+
+         /* Get element size from the biggest mode.  */
+         unsigned int element_bits
+           = GET_MODE_BITSIZE (this->m_biggest_mode_for_conv).to_constant ();
+
+         /* Estimate LMUL from VF * element_size / MIN_VLEN.  */
+         int this_lmul = (this_vf * element_bits) / TARGET_MIN_VLEN;
+         int other_lmul = (other_vf * element_bits) / TARGET_MIN_VLEN;
+
+         /* Clamp to valid LMUL range.  */
+         this_lmul = MAX (1, MIN (this_lmul, 8));
+         other_lmul = MAX (1, MIN (other_lmul, 8));
+
+         int target_lmul = this->m_computed_lmul_from_conv;
+
+         /* Prefer the LMUL that exactly matches our computed ratio.  */
+         if (this_lmul == target_lmul && other_lmul != target_lmul)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                "Preferring LMUL=%d loop because it matches"
+                                " conversion ratio (other LMUL=%d)\n",
+                                this_lmul, other_lmul);
+             return true;
+           }
+         else if (this_lmul != target_lmul && other_lmul == target_lmul)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                "Preferring other LMUL=%d loop because it 
matches"
+                                " conversion ratio (this LMUL=%d)\n",
+                                other_lmul, this_lmul);
+             return false;
+           }
+       }
+    }
   else if (rvv_max_lmul == RVV_DYNAMIC)
     {
       if (other->m_has_unexpected_spills_p)
diff --git a/gcc/config/riscv/riscv-vector-costs.h 
b/gcc/config/riscv/riscv-vector-costs.h
index b84ceb1d3cf..89f813c3d98 100644
--- a/gcc/config/riscv/riscv-vector-costs.h
+++ b/gcc/config/riscv/riscv-vector-costs.h
@@ -106,6 +106,11 @@ private:
   bool m_has_unexpected_spills_p = false;
   void record_potential_unexpected_spills (loop_vec_info);
 
+  /* For RVV_DYNAMIC_CONV mode, store the LMUL computed from conversion ratio
+     and the biggest mode used in the computation.  */
+  int m_computed_lmul_from_conv = 0;
+  machine_mode m_biggest_mode_for_conv = VOIDmode;
+
   void compute_local_program_points (vec_info *,
                                     hash_map<basic_block, vec<stmt_point>> &);
   void update_local_live_ranges (vec_info *,
@@ -114,9 +119,17 @@ private:
                                 machine_mode *);
   machine_mode compute_local_live_ranges
     (loop_vec_info, const hash_map<basic_block, vec<stmt_point>> &,
-     hash_map<basic_block, hash_map<tree, pair>> &);
-
+     hash_map<basic_block, hash_map<tree, pair>> &,
+     machine_mode * = nullptr);
+
+  void compute_live_ranges_and_lmul (loop_vec_info,
+                                    hash_map<basic_block, vec<stmt_point>> &,
+                                    hash_map<basic_block, hash_map<tree, 
pair>> &,
+                                    machine_mode &, machine_mode &, int &);
+  void cleanup_live_range_data (hash_map<basic_block, vec<stmt_point>> &,
+                               hash_map<basic_block, hash_map<tree, pair>> &);
   bool has_unexpected_spills_p (loop_vec_info);
+  void compute_conversion_dynamic_lmul (loop_vec_info);
   bool need_additional_vector_vars_p (stmt_vec_info, slp_tree);
 
   void adjust_vect_cost_per_loop (loop_vec_info);
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 452062c6500..de7730a8961 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -313,6 +313,9 @@ Enum(rvv_max_lmul) String(m8) Value(RVV_M8)
 EnumValue
 Enum(rvv_max_lmul) String(dynamic) Value(RVV_DYNAMIC)
 
+EnumValue
+Enum(rvv_max_lmul) String(conv-dynamic) Value(RVV_CONV_DYNAMIC)
+
 mrvv-max-lmul=
 Target RejectNegative Joined Enum(rvv_max_lmul) Var(rvv_max_lmul) Init(RVV_M1)
 -mrvv-max-lmul=<string>        Set the RVV LMUL of auto-vectorization.
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c
new file mode 100644
index 00000000000..b07bd86f76e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" } 
*/
+
+void foo2x1 (short *restrict a, char *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo2x2 (int *restrict a, short *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo2x3 (long *restrict a, int *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo4x1 (int *restrict a, char *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo4x2 (long *restrict a, short *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo8x (long *restrict a, char *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+/* { dg-final { scan-assembler-times ",m2," 3 } } */
+/* { dg-final { scan-assembler-times ",m4," 2 } } */
+/* { dg-final { scan-assembler-times ",m8," 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c
new file mode 100644
index 00000000000..c37e4dd63f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/dyn-lmul-conv-2.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" } 
*/
+
+void foo2x1 (unsigned char *restrict a, unsigned short *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo2x2 (unsigned short *restrict a, unsigned int *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo2x3 (unsigned int *restrict a, unsigned long *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo4x1 (unsigned char *restrict a, unsigned int *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo4x2 (unsigned short *restrict a, unsigned long *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+void foo8x (unsigned char *restrict a, unsigned long *restrict b, int n)
+{
+  for (int i = 0; i < n; i++)
+    a[i] = b[i];
+}
+
+/* { dg-final { scan-assembler-times ",m1," 6 } } */
+/* { dg-final { scan-assembler-times ",m2," 3 } } */
+/* { dg-final { scan-assembler-times ",m4," 1 } } */
+/* { dg-final { scan-assembler-not ",mf2," } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c
new file mode 100644
index 00000000000..7753a66cd96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122846.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -mrvv-max-lmul=conv-dynamic" } 
*/
+
+int
+foo (const char *x, const char *y)
+{
+  int sum = 0;
+  for (int i = 0; i < 1024; i++)
+    sum += x[i] * y[i];
+  return sum;
+}
+
+/* One for the initial value, one for the reduction.  */
+/* { dg-final { scan-assembler-times ",m4," 2 } } */
-- 
2.51.1

[PATCH] RISC-V: -mrvv-max-lmul=conv-dynamic [PR122846].

Reply via email to