Applies after the recent 9 patch series: "RISC-V: Improve const vector costing and expansion" https://inbox.sourceware.org/gcc-patches/20240822194705.2789364-1-patr...@rivosinc.com/T/#t
This isn't functional due to RTX hash collisions. It was incredibly useful and helped me catch a few tricky bugs like: "RISC-V: Handle 0.0 floating point pattern costing" Current flow is susceptible to hash collisions. Ideal flow would be: Costing: Insert into hashmap<rtx, vec<(const_rtx, enum)>> Expand: Check for membership in hashmap -> Not in hashmap: ignore, this wasn't costed -> In hashmap: Iterate over vec -> if RTX not in hashmap: Ignore, this wasn't costed (hash collision) -> if RTX in hashmap: Assert enum is expected Example of hash collision: hash -663464470: (const_vector:RVVM4DI repeat [ (const_int 8589934593 [0x200000001]) ]) hash -663464470: (const_vector:RVVM4SF repeat [ (const_double:SF 1.0e+0 [0x0.8p+1]) (const_double:SF 2.0e+0 [0x0.8p+2]) ]) Segher pointed out that collisions are inevitible (~80k 32 bit hashes have a >50% chance of containing a collision) If this is worth adding then these are the next questions: * How heavy-weight is it to store a copy of every costed const RTX vector (and ideally other costed expressions later). * Does this belong in release or gated behind rtx checking? Signed-off-by: Patrick O'Neill <patr...@rivosinc.com> --- gcc/config/riscv/riscv-v.cc | 29 +++++++++++++++++++++++++++++ gcc/config/riscv/riscv-v.h | 14 ++++++++++++++ gcc/config/riscv/riscv.cc | 27 ++++++++++++++++++++++++--- 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 6ba4b6291bb..db43312f59f 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1164,6 +1164,10 @@ expand_vector_init_trailing_same_elem (rtx target, static void expand_const_vector (rtx target, rtx src) { + riscv_const_expect_p* expected_pattern = NULL; + if (EXPECTED_CONST_PATTERN) + expected_pattern = EXPECTED_CONST_PATTERN->get (src); + machine_mode mode = GET_MODE (target); rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode); rtx elt; @@ -1171,6 +1175,8 @@ expand_const_vector (rtx target, rtx src) { if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) { + if (expected_pattern) + gcc_assert (*expected_pattern == RVV_DUPLICATE_BOOL); gcc_assert (rtx_equal_p (elt, const0_rtx) || rtx_equal_p (elt, const1_rtx)); rtx ops[] = {result, src}; @@ -1180,11 +1186,16 @@ expand_const_vector (rtx target, rtx src) we use vmv.v.i instruction. */ else if (satisfies_constraint_vi (src) || satisfies_constraint_Wc0 (src)) { + if (expected_pattern) + gcc_assert (*expected_pattern == RVV_DUPLICATE_VMV_VI); rtx ops[] = {result, src}; emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops); } else { + if (expected_pattern) + gcc_assert (*expected_pattern == RVV_DUPLICATE_INT_FP); + /* Emit vec_duplicate<mode> split pattern before RA so that we could have a better optimization opportunity in LICM which will hoist vmv.v.x outside the loop and in fwprop && combine @@ -1221,6 +1232,8 @@ expand_const_vector (rtx target, rtx src) rtx base, step; if (const_vec_series_p (src, &base, &step)) { + if (expected_pattern) + gcc_assert (*expected_pattern == RVV_SERIES); expand_vec_series (result, base, step); if (result != target) @@ -1314,6 +1327,8 @@ expand_const_vector (rtx target, rtx src) gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); if (builder.single_step_npatterns_p ()) { + if (expected_pattern) + gcc_assert (*expected_pattern == RVV_PATTERN_SINGLE_STEP); /* Describe the case by choosing NPATTERNS = 4 as an example. */ insn_code icode; @@ -1453,6 +1468,8 @@ expand_const_vector (rtx target, rtx src) } else if (builder.interleaved_stepped_npatterns_p ()) { + if (expected_pattern) + gcc_assert (*expected_pattern == RVV_PATTERN_INTERLEAVED); rtx base1 = builder.elt (0); rtx base2 = builder.elt (1); poly_int64 step1 @@ -1555,6 +1572,11 @@ expand_const_vector (rtx target, rtx src) if (emit_catch_all_pattern) { + /* Ensure the vector cost emitted by riscv_const_insns expected this + pattern to be handled by the catch all pattern. */ + if (expected_pattern) + gcc_assert (*expected_pattern == RVV_CATCH_ALL); + int nelts = XVECLEN (src, 0); /* Optimize trailing same elements sequence: @@ -1566,6 +1588,13 @@ expand_const_vector (rtx target, rtx src) to/reading from the stack to initialize vectors. */ expand_vector_init_insert_elems (result, builder, nelts); } + else + { + /* Ensure the vector cost emitted by riscv_const_insns expected this + pattern to be handled by an optimized pattern. */ + if (expected_pattern) + gcc_assert (*expected_pattern != RVV_CATCH_ALL); + } if (result != target) emit_move_insn (target, result); diff --git a/gcc/config/riscv/riscv-v.h b/gcc/config/riscv/riscv-v.h index 4635b5415c7..3e62334b890 100644 --- a/gcc/config/riscv/riscv-v.h +++ b/gcc/config/riscv/riscv-v.h @@ -22,8 +22,22 @@ #ifndef GCC_RISCV_V_H #define GCC_RISCV_V_H +#include "hash-map.h" #include "rtx-vector-builder.h" +enum riscv_const_expect_p +{ + RVV_DUPLICATE_BOOL, + RVV_DUPLICATE_VMV_VI, + RVV_DUPLICATE_INT_FP, + RVV_SERIES, + RVV_PATTERN_SINGLE_STEP, + RVV_PATTERN_INTERLEAVED, + RVV_CATCH_ALL, +}; + +extern hash_map<const_rtx, enum riscv_const_expect_p> *EXPECTED_CONST_PATTERN; + using namespace riscv_vector; namespace riscv_vector { diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index d1b8c27f4ac..8b752afeae7 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -681,6 +681,10 @@ static const struct riscv_tune_info riscv_tune_info_table[] = { function. */ static bool riscv_save_frame_pointer; +/* Global variable used in riscv-v.cc to ensure accurate costs are emitted + for constant vectors. */ +hash_map<const_rtx, enum riscv_const_expect_p> *EXPECTED_CONST_PATTERN = NULL; + typedef enum { PUSH_IDX = 0, @@ -2131,6 +2135,11 @@ riscv_const_insns (rtx x, bool allow_new_pseudos) return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0; case CONST_VECTOR: { + /* Used to assert we aren't mislabeling optimized/fallthrough + patterns and are emitting accurate costs. */ + if (!EXPECTED_CONST_PATTERN) + EXPECTED_CONST_PATTERN = new hash_map<const_rtx, riscv_const_expect_p>; + /* TODO: This is not accurate, we will need to adapt the COST of CONST_VECTOR in the future for the following cases: @@ -2148,8 +2157,11 @@ riscv_const_insns (rtx x, bool allow_new_pseudos) if (const_vec_duplicate_p (x, &elt)) { if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) - /* Duplicate values of 0/1 can be emitted using vmv.v.i. */ - return 1; + { + EXPECTED_CONST_PATTERN->put (x, RVV_DUPLICATE_BOOL); + /* Duplicate values of 0/1 can be emitted using vmv.v.i. */ + return 1; + } /* We don't allow CONST_VECTOR for DI vector on RV32 system since the ELT constant value can not held @@ -2162,13 +2174,17 @@ riscv_const_insns (rtx x, bool allow_new_pseudos) /* Constants in range -16 ~ 15 integer or 0.0 floating-point can be emitted using vmv.v.i. */ if (satisfies_constraint_vi (x) || satisfies_constraint_Wc0 (x)) - return 1; + { + EXPECTED_CONST_PATTERN->put (x, RVV_DUPLICATE_VMV_VI); + return 1; + } /* Any int/FP constants can always be broadcast from a scalar register. Loading of a floating-point constant incurs a literal-pool access. Allow this in order to increase vectorization possibilities. */ int n = riscv_const_insns (elt, allow_new_pseudos); + EXPECTED_CONST_PATTERN->put (x, RVV_DUPLICATE_INT_FP); if (CONST_DOUBLE_P (elt)) return 1 + 4; /* vfmv.v.f + memory access. */ else @@ -2186,6 +2202,7 @@ riscv_const_insns (rtx x, bool allow_new_pseudos) rtx base, step; if (const_vec_series_p (x, &base, &step)) { + EXPECTED_CONST_PATTERN->put (x, RVV_SERIES); /* This cost is not accurate, we will need to adapt the COST accurately according to BASE && STEP. */ return 1; @@ -2216,6 +2233,7 @@ riscv_const_insns (rtx x, bool allow_new_pseudos) if (builder.single_step_npatterns_p ()) { + EXPECTED_CONST_PATTERN->put (x, RVV_PATTERN_SINGLE_STEP); if (builder.npatterns_all_equal_p ()) { /* TODO: This cost is not accurate. */ @@ -2229,6 +2247,7 @@ riscv_const_insns (rtx x, bool allow_new_pseudos) } else if (builder.interleaved_stepped_npatterns_p ()) { + EXPECTED_CONST_PATTERN->put (x, RVV_PATTERN_INTERLEAVED); /* TODO: This cost is not accurate. */ return 1; } @@ -2256,6 +2275,8 @@ riscv_const_insns (rtx x, bool allow_new_pseudos) /* Our vslide1up/down insn def does not handle HF. */ return 0; + EXPECTED_CONST_PATTERN->put (x, RVV_CATCH_ALL); + /* We already checked for a fully const vector above. Calculate the number of leading/trailing elements covered by the splat. */ int leading_ndups = 1; -- 2.34.1