Applies after the recent 9 patch series:
"RISC-V: Improve const vector costing and expansion"
https://inbox.sourceware.org/gcc-patches/20240822194705.2789364-1-patr...@rivosinc.com/T/#t

This isn't functional due to RTX hash collisions. It was incredibly useful and
helped me catch a few tricky bugs like:
"RISC-V: Handle 0.0 floating point pattern costing"

Current flow is susceptible to hash collisions.

Ideal flow would be:
Costing: Insert into hashmap<rtx, vec<(const_rtx, enum)>>
Expand: Check for membership in hashmap
 -> Not in hashmap: ignore, this wasn't costed
 -> In hashmap: Iterate over vec
    -> if RTX not in hashmap: Ignore, this wasn't costed (hash collision)
    -> if RTX in hashmap: Assert enum is expected

Example of hash collision:
hash -663464470:
(const_vector:RVVM4DI repeat [
        (const_int 8589934593 [0x200000001])
    ])
hash -663464470:
(const_vector:RVVM4SF repeat [
        (const_double:SF 1.0e+0 [0x0.8p+1])
        (const_double:SF 2.0e+0 [0x0.8p+2])
    ])

Segher pointed out that collisions are inevitible (~80k 32 bit hashes
have a >50% chance of containing a collision)

If this is worth adding then these are the next questions:
* How heavy-weight is it to store a copy of every costed
  const RTX vector (and ideally other costed expressions later).
* Does this belong in release or gated behind rtx checking?

Signed-off-by: Patrick O'Neill <patr...@rivosinc.com>
---
 gcc/config/riscv/riscv-v.cc | 29 +++++++++++++++++++++++++++++
 gcc/config/riscv/riscv-v.h  | 14 ++++++++++++++
 gcc/config/riscv/riscv.cc   | 27 ++++++++++++++++++++++++---
 3 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 6ba4b6291bb..db43312f59f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1164,6 +1164,10 @@ expand_vector_init_trailing_same_elem (rtx target,
 static void
 expand_const_vector (rtx target, rtx src)
 {
+  riscv_const_expect_p* expected_pattern = NULL;
+  if (EXPECTED_CONST_PATTERN)
+    expected_pattern = EXPECTED_CONST_PATTERN->get (src);
+
   machine_mode mode = GET_MODE (target);
   rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
   rtx elt;
@@ -1171,6 +1175,8 @@ expand_const_vector (rtx target, rtx src)
     {
       if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
        {
+         if (expected_pattern)
+           gcc_assert (*expected_pattern == RVV_DUPLICATE_BOOL);
          gcc_assert (rtx_equal_p (elt, const0_rtx)
                      || rtx_equal_p (elt, const1_rtx));
          rtx ops[] = {result, src};
@@ -1180,11 +1186,16 @@ expand_const_vector (rtx target, rtx src)
         we use vmv.v.i instruction.  */
       else if (satisfies_constraint_vi (src) || satisfies_constraint_Wc0 (src))
        {
+         if (expected_pattern)
+           gcc_assert (*expected_pattern == RVV_DUPLICATE_VMV_VI);
          rtx ops[] = {result, src};
          emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops);
        }
       else
        {
+         if (expected_pattern)
+           gcc_assert (*expected_pattern == RVV_DUPLICATE_INT_FP);
+
          /* Emit vec_duplicate<mode> split pattern before RA so that
             we could have a better optimization opportunity in LICM
             which will hoist vmv.v.x outside the loop and in fwprop && combine
@@ -1221,6 +1232,8 @@ expand_const_vector (rtx target, rtx src)
   rtx base, step;
   if (const_vec_series_p (src, &base, &step))
     {
+      if (expected_pattern)
+       gcc_assert (*expected_pattern == RVV_SERIES);
       expand_vec_series (result, base, step);

       if (result != target)
@@ -1314,6 +1327,8 @@ expand_const_vector (rtx target, rtx src)
       gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
       if (builder.single_step_npatterns_p ())
        {
+         if (expected_pattern)
+           gcc_assert (*expected_pattern == RVV_PATTERN_SINGLE_STEP);
          /* Describe the case by choosing NPATTERNS = 4 as an example.  */
          insn_code icode;

@@ -1453,6 +1468,8 @@ expand_const_vector (rtx target, rtx src)
        }
       else if (builder.interleaved_stepped_npatterns_p ())
        {
+         if (expected_pattern)
+           gcc_assert (*expected_pattern == RVV_PATTERN_INTERLEAVED);
          rtx base1 = builder.elt (0);
          rtx base2 = builder.elt (1);
          poly_int64 step1
@@ -1555,6 +1572,11 @@ expand_const_vector (rtx target, rtx src)

   if (emit_catch_all_pattern)
     {
+      /* Ensure the vector cost emitted by riscv_const_insns expected this
+        pattern to be handled by the catch all pattern.  */
+      if (expected_pattern)
+       gcc_assert (*expected_pattern == RVV_CATCH_ALL);
+
       int nelts = XVECLEN (src, 0);

       /* Optimize trailing same elements sequence:
@@ -1566,6 +1588,13 @@ expand_const_vector (rtx target, rtx src)
           to/reading from the stack to initialize vectors.  */
        expand_vector_init_insert_elems (result, builder, nelts);
     }
+  else
+    {
+      /* Ensure the vector cost emitted by riscv_const_insns expected this
+        pattern to be handled by an optimized pattern.  */
+      if (expected_pattern)
+       gcc_assert (*expected_pattern != RVV_CATCH_ALL);
+    }

   if (result != target)
     emit_move_insn (target, result);
diff --git a/gcc/config/riscv/riscv-v.h b/gcc/config/riscv/riscv-v.h
index 4635b5415c7..3e62334b890 100644
--- a/gcc/config/riscv/riscv-v.h
+++ b/gcc/config/riscv/riscv-v.h
@@ -22,8 +22,22 @@
 #ifndef GCC_RISCV_V_H
 #define GCC_RISCV_V_H

+#include "hash-map.h"
 #include "rtx-vector-builder.h"

+enum riscv_const_expect_p
+{
+  RVV_DUPLICATE_BOOL,
+  RVV_DUPLICATE_VMV_VI,
+  RVV_DUPLICATE_INT_FP,
+  RVV_SERIES,
+  RVV_PATTERN_SINGLE_STEP,
+  RVV_PATTERN_INTERLEAVED,
+  RVV_CATCH_ALL,
+};
+
+extern hash_map<const_rtx, enum riscv_const_expect_p> *EXPECTED_CONST_PATTERN;
+
 using namespace riscv_vector;

 namespace riscv_vector {
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d1b8c27f4ac..8b752afeae7 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -681,6 +681,10 @@ static const struct riscv_tune_info 
riscv_tune_info_table[] = {
    function.  */
 static bool riscv_save_frame_pointer;

+/* Global variable used in riscv-v.cc to ensure accurate costs are emitted
+   for constant vectors.  */
+hash_map<const_rtx, enum riscv_const_expect_p> *EXPECTED_CONST_PATTERN = NULL;
+
 typedef enum
 {
   PUSH_IDX = 0,
@@ -2131,6 +2135,11 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
       return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
     case CONST_VECTOR:
       {
+       /* Used to assert we aren't mislabeling optimized/fallthrough
+          patterns and are emitting accurate costs.  */
+       if (!EXPECTED_CONST_PATTERN)
+         EXPECTED_CONST_PATTERN = new hash_map<const_rtx, 
riscv_const_expect_p>;
+
        /* TODO: This is not accurate, we will need to
           adapt the COST of CONST_VECTOR in the future
           for the following cases:
@@ -2148,8 +2157,11 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
            if (const_vec_duplicate_p (x, &elt))
              {
                if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
-                 /* Duplicate values of 0/1 can be emitted using vmv.v.i.  */
-                 return 1;
+                 {
+                   EXPECTED_CONST_PATTERN->put (x, RVV_DUPLICATE_BOOL);
+                   /* Duplicate values of 0/1 can be emitted using vmv.v.i.  */
+                   return 1;
+                 }

                /* We don't allow CONST_VECTOR for DI vector on RV32
                   system since the ELT constant value can not held
@@ -2162,13 +2174,17 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
                /* Constants in range -16 ~ 15 integer or 0.0 floating-point
                   can be emitted using vmv.v.i.  */
                if (satisfies_constraint_vi (x) || satisfies_constraint_Wc0 (x))
-                 return 1;
+                 {
+                   EXPECTED_CONST_PATTERN->put (x, RVV_DUPLICATE_VMV_VI);
+                   return 1;
+                 }

                /* Any int/FP constants can always be broadcast from a
                   scalar register.  Loading of a floating-point
                   constant incurs a literal-pool access.  Allow this in
                   order to increase vectorization possibilities.  */
                int n = riscv_const_insns (elt, allow_new_pseudos);
+               EXPECTED_CONST_PATTERN->put (x, RVV_DUPLICATE_INT_FP);
                if (CONST_DOUBLE_P (elt))
                    return 1 + 4; /* vfmv.v.f + memory access.  */
                else
@@ -2186,6 +2202,7 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
            rtx base, step;
            if (const_vec_series_p (x, &base, &step))
              {
+               EXPECTED_CONST_PATTERN->put (x, RVV_SERIES);
                /* This cost is not accurate, we will need to adapt the COST
                   accurately according to BASE && STEP.  */
                return 1;
@@ -2216,6 +2233,7 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)

                if (builder.single_step_npatterns_p ())
                  {
+                   EXPECTED_CONST_PATTERN->put (x, RVV_PATTERN_SINGLE_STEP);
                    if (builder.npatterns_all_equal_p ())
                      {
                        /* TODO: This cost is not accurate.  */
@@ -2229,6 +2247,7 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
                  }
                else if (builder.interleaved_stepped_npatterns_p ())
                  {
+                   EXPECTED_CONST_PATTERN->put (x, RVV_PATTERN_INTERLEAVED);
                    /* TODO: This cost is not accurate.  */
                    return 1;
                  }
@@ -2256,6 +2275,8 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
              /* Our vslide1up/down insn def does not handle HF.  */
              return 0;

+           EXPECTED_CONST_PATTERN->put (x, RVV_CATCH_ALL);
+
            /* We already checked for a fully const vector above.  Calculate
               the number of leading/trailing elements covered by the splat.  */
            int leading_ndups = 1;
--
2.34.1

Reply via email to