Hi, in PR115336 we have the following
vect_patt_391 = .MASK_LEN_GATHER_LOAD (_470, vect__59, 1, { 0, ... }, { 0, ... }, _482, 0); vect_iftmp.44 = vect_patt_391 | { 1, ... }; .MASK_LEN_STORE (vectp_f.45, 8B, { -1, ... }, _482, 0, vect_iftmp.44); which assumes that a maskload sets the masked-out elements to zero. RVV does not guarantee this and, unfortunately, leaves it to the hardware implementation to do basically anything it wants (even keep the previous value). In the PR this leads to us reusing a previous vector register and stale, nonzero values causing an invalid result. Based on a Richard Sandiford's feedback I started with the attached patch - it's more an RFC in its current shape and obviously not tested exhaustively. The idea is: - In ifcvt emit a VCOND_MASK (mask, load_result, preferred_else_value) after a MASK_LOAD if the target requires it. - Elide the VCOND_MASK when there is a COND_OP with a replacing else value later. Is this, generally, reasonable or is there a better way? My initial idea was to add an else value to MASK_LOAD. Richard's concern was, though, that this might make nonzero else values appear inexpensive when they are actually not. Even though I, mechanically, added match.pd patterns to catch the most common cases (already at a point where a separate function maybe in gimple-match-exports? would make more sense), there is still significant code-quality fallout. The regressing cases are often of a form where the VCOND_MASK is not just a conversion away but rather hidden behind some unmasked operation. I'm not sure if we could ever recognize everything that way without descending very deep. Regards Robin gcc/ChangeLog: PR middle-end/115336 * config/riscv/riscv.cc (riscv_preferred_else_value): Add MASK_LOAD. * config/riscv/riscv.h (TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO): Set to true. * defaults.h (TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO): New. * doc/tm.texi: Document. * doc/tm.texi.in: Document. * match.pd: Add patterns to allow replacing the else value of a VEC_COND. * tree-if-conv.cc (predicate_load_or_store): Emit a VEC_COND after a MASK_LOAD if the target does not guarantee zeroing. (predicate_statements): Add temporary lhs argument. * tree-ssa-math-opts.cc (convert_mult_to_fma_1): Re-fold fold result. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr115336.c: New test. --- gcc/config/riscv/riscv.cc | 2 + gcc/config/riscv/riscv.h | 2 + gcc/defaults.h | 4 + gcc/doc/tm.texi | 5 + gcc/doc/tm.texi.in | 5 + gcc/match.pd | 125 +++++++++++++++++- .../gcc.target/riscv/rvv/autovec/pr115336.c | 20 +++ gcc/tree-if-conv.cc | 57 +++++++- gcc/tree-ssa-math-opts.cc | 8 ++ 9 files changed, 217 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index c17141d909a..e10bc6824b9 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -11481,6 +11481,8 @@ riscv_preferred_else_value (unsigned ifn, tree vectype, unsigned int nops, { if (riscv_v_ext_mode_p (TYPE_MODE (vectype))) return get_or_create_ssa_default_def (cfun, create_tmp_var (vectype)); + else if (ifn == IFN_MASK_LOAD) + return get_or_create_ssa_default_def (cfun, create_tmp_var (vectype)); return default_preferred_else_value (ifn, vectype, nops, ops); } diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 57910eecd3e..dc15eb5e60f 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -1264,4 +1264,6 @@ extern void riscv_remove_unneeded_save_restore_calls (void); /* Check TLS Descriptors mechanism is selected. */ #define TARGET_TLSDESC (riscv_tls_dialect == TLS_DESCRIPTORS) +#define TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO 1 + #endif /* ! GCC_RISCV_H */ diff --git a/gcc/defaults.h b/gcc/defaults.h index 92f3e07f742..6ffbdaea229 100644 --- a/gcc/defaults.h +++ b/gcc/defaults.h @@ -1457,6 +1457,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define DWARF_VERSION_DEFAULT 5 #endif +#ifndef TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO +#define TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO 0 +#endif + #ifndef USED_FOR_TARGET /* Done this way to keep gengtype happy. */ #if BITS_PER_UNIT == 8 diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index be5543b72f8..2fdebe7fd21 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -12683,6 +12683,11 @@ maintainer is familiar with. @end defmac +@defmac TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO +Bla + +@end defmac + @deftypefn {Target Hook} bool TARGET_HAVE_SPECULATION_SAFE_VALUE (bool @var{active}) This hook is used to determine the level of target support for @code{__builtin_speculation_safe_value}. If called with an argument diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 87a7f895174..276c38325dc 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -8097,6 +8097,11 @@ maintainer is familiar with. @end defmac +@defmac TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO +Bla + +@end defmac + @hook TARGET_HAVE_SPECULATION_SAFE_VALUE @hook TARGET_SPECULATION_SAFE_VALUE diff --git a/gcc/match.pd b/gcc/match.pd index 3d0689c9312..2fdc14ef56f 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -9744,7 +9744,22 @@ and, #endif /* Detect cases in which a VEC_COND_EXPR effectively replaces the - "else" value of an IFN_COND_*. */ + "else" value of an IFN_COND_* as well as when the IFN_COND_* + replaces the else of the VEC_COND_EXPR. */ +(for cond_op (COND_UNARY) + (simplify + (cond_op @0 (view_convert? (vec_cond @0 @1 @2)) @3) + (with { tree op_type = TREE_TYPE (@3); } + (if (element_precision (type) == element_precision (op_type)) + (cond_op @0 (view_convert:op_type @1) @3))))) + +(for cond_len_op (COND_LEN_UNARY) + (simplify + (cond_len_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4 @5) + (with { tree op_type = TREE_TYPE (@3); } + (if (element_precision (type) == element_precision (op_type)) + (cond_len_op @0 (view_convert:op_type @1) @3 @4 @5))))) + (for cond_op (COND_BINARY) (simplify (vec_cond @0 (view_convert? (cond_op @0 @1 @2 @3)) @4) @@ -9756,7 +9771,27 @@ and, (with { tree op_type = TREE_TYPE (@5); } (if (inverse_conditions_p (@0, @2) && element_precision (type) == element_precision (op_type)) - (view_convert (cond_op @2 @3 @4 (view_convert:op_type @1))))))) + (view_convert (cond_op @2 @3 @4 (view_convert:op_type @1)))))) + (simplify + (cond_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4) + (with { tree op_type = TREE_TYPE (@3); } + (if (element_precision (type) == element_precision (op_type)) + (cond_op @0 (view_convert:op_type @1) @3 @4)))) + (simplify + (cond_op @0 @1 (view_convert? (vec_cond @0 @2 @3)) @4) + (with { tree op_type = TREE_TYPE (@1); } + (if (element_precision (type) == element_precision (op_type)) + (cond_op @0 @1 (view_convert:op_type @2) @4)))) + (simplify + (cond_op @0 (convert? (vec_cond @0 @1 @2)) @3 @4) + (with { tree op_type = TREE_TYPE (@3); } + (if (element_precision (type) == element_precision (op_type)) + (cond_op @0 (convert:op_type @1) @3 @4)))) + (simplify + (cond_op @0 @1 (convert? (vec_cond @0 @2 @3)) @4) + (with { tree op_type = TREE_TYPE (@1); } + (if (element_precision (type) == element_precision (op_type)) + (cond_op @0 @1 (convert:op_type @2) @4))))) /* Same for ternary operations. */ (for cond_op (COND_TERNARY) @@ -9770,7 +9805,37 @@ and, (with { tree op_type = TREE_TYPE (@6); } (if (inverse_conditions_p (@0, @2) && element_precision (type) == element_precision (op_type)) - (view_convert (cond_op @2 @3 @4 @5 (view_convert:op_type @1))))))) + (view_convert (cond_op @2 @3 @4 @5 (view_convert:op_type @1)))))) + (simplify + (cond_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4 @5) + (with { tree op_type = TREE_TYPE (@3); } + (if (element_precision (type) == element_precision (op_type)) + (cond_op @0 (view_convert:op_type @1) @3 @4 @5)))) + (simplify + (cond_op @0 @1 (view_convert? (vec_cond @0 @2 @3)) @4 @5) + (with { tree op_type = TREE_TYPE (@1); } + (if (element_precision (type) == element_precision (op_type)) + (cond_op @0 @1 (view_convert:op_type @2) @4 @5)))) + (simplify + (cond_op @0 @1 @2 (view_convert? (vec_cond @0 @3 @4)) @5) + (with { tree op_type = TREE_TYPE (@1); } + (if (element_precision (type) == element_precision (op_type)) + (cond_op @0 @1 @2 (view_convert:op_type @3) @5)))) + (simplify + (cond_op @0 (convert? (vec_cond @0 @1 @2)) @3 @4 @5) + (with { tree op_type = TREE_TYPE (@3); } + (if (element_precision (type) == element_precision (op_type)) + (cond_op @0 (convert:op_type @1) @3 @4 @5)))) + (simplify + (cond_op @0 @1 (convert? (vec_cond @0 @2 @3)) @4 @5) + (with { tree op_type = TREE_TYPE (@1); } + (if (element_precision (type) == element_precision (op_type)) + (cond_op @0 @1 (convert:op_type @2) @4 @5)))) + (simplify + (cond_op @0 @1 @2 (convert? (vec_cond @0 @3 @4)) @5) + (with { tree op_type = TREE_TYPE (@1); } + (if (element_precision (type) == element_precision (op_type)) + (cond_op @0 @1 @2 (convert:op_type @3) @5))))) /* Detect cases in which a VEC_COND_EXPR effectively replaces the "else" value of an IFN_COND_LEN_*. */ @@ -9785,7 +9850,27 @@ and, (with { tree op_type = TREE_TYPE (@5); } (if (inverse_conditions_p (@0, @2) && element_precision (type) == element_precision (op_type)) - (view_convert (cond_len_op @2 @3 @4 (view_convert:op_type @1) @6 @7)))))) + (view_convert (cond_len_op @2 @3 @4 (view_convert:op_type @1) @6 @7))))) + (simplify + (cond_len_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4 @5 @6) + (with { tree op_type = TREE_TYPE (@3); } + (if (element_precision (type) == element_precision (op_type)) + (cond_len_op @0 (view_convert:op_type @1) @3 @4 @5 @6)))) + (simplify + (cond_len_op @0 @1 (view_convert? (vec_cond @0 @2 @3)) @4 @5 @6) + (with { tree op_type = TREE_TYPE (@1); } + (if (element_precision (type) == element_precision (op_type)) + (cond_len_op @0 @1 (view_convert:op_type @2) @4 @5 @6)))) + (simplify + (cond_len_op @0 (convert? (vec_cond @0 @1 @2)) @3 @4 @5 @6) + (with { tree op_type = TREE_TYPE (@3); } + (if (element_precision (type) == element_precision (op_type)) + (cond_len_op @0 (convert:op_type @1) @3 @4 @5 @6)))) + (simplify + (cond_len_op @0 @1 (convert? (vec_cond @0 @2 @3)) @4 @5 @6) + (with { tree op_type = TREE_TYPE (@1); } + (if (element_precision (type) == element_precision (op_type)) + (cond_len_op @0 @1 (convert:op_type @2) @4 @5 @6))))) /* Same for ternary operations. */ (for cond_len_op (COND_LEN_TERNARY) @@ -9799,7 +9884,37 @@ and, (with { tree op_type = TREE_TYPE (@6); } (if (inverse_conditions_p (@0, @2) && element_precision (type) == element_precision (op_type)) - (view_convert (cond_len_op @2 @3 @4 @5 (view_convert:op_type @1) @7 @8)))))) + (view_convert (cond_len_op @2 @3 @4 @5 (view_convert:op_type @1) @7 @8))))) + (simplify + (cond_len_op @0 (view_convert? (vec_cond @0 @1 @2)) @3 @4 @5 @6 @7) + (with { tree op_type = TREE_TYPE (@3); } + (if (element_precision (type) == element_precision (op_type)) + (cond_len_op @0 (view_convert:op_type @1) @3 @4 @5 @6 @7)))) + (simplify + (cond_len_op @0 @1 (view_convert? (vec_cond @0 @2 @3)) @4 @5 @6 @7) + (with { tree op_type = TREE_TYPE (@1); } + (if (element_precision (type) == element_precision (op_type)) + (cond_len_op @0 @1 (view_convert:op_type @2) @4 @5 @6 @7)))) + (simplify + (cond_len_op @0 @1 @2 (view_convert? (vec_cond @0 @3 @4)) @5 @6 @7) + (with { tree op_type = TREE_TYPE (@1); } + (if (element_precision (type) == element_precision (op_type)) + (cond_len_op @0 @1 @2 (view_convert:op_type @3) @5 @6 @7)))) + (simplify + (cond_len_op @0 (convert? (vec_cond @0 @1 @2)) @3 @4 @5 @6 @7) + (with { tree op_type = TREE_TYPE (@3); } + (if (element_precision (type) == element_precision (op_type)) + (cond_len_op @0 (convert:op_type @1) @3 @4 @5 @6 @7)))) + (simplify + (cond_len_op @0 @1 (convert? (vec_cond @0 @2 @3)) @4 @5 @6 @7) + (with { tree op_type = TREE_TYPE (@1); } + (if (element_precision (type) == element_precision (op_type)) + (cond_len_op @0 @1 (convert:op_type @2) @4 @5 @6 @7)))) + (simplify + (cond_len_op @0 @1 @2 (convert? (vec_cond @0 @3 @4)) @5 @6 @7) + (with { tree op_type = TREE_TYPE (@1); } + (if (element_precision (type) == element_precision (op_type)) + (cond_len_op @0 @1 @2 (convert:op_type @3) @5 @6 @7))))) /* Detect simplication for a conditional reduction where diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c new file mode 100644 index 00000000000..29e55705a7a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115336.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options { -O3 -march=rv64gcv_zvl256b -mabi=lp64d } } */ + +short d[19]; +_Bool e[100][19][19]; +_Bool f[10000]; + +int main() +{ + for (long g = 0; g < 19; ++g) + d[g] = 3; + _Bool(*h)[19][19] = e; + for (short g = 0; g < 9; g++) + for (int i = 4; i < 16; i += 3) + f[i * 9 + g] = d[i] ? d[i] : h[g][i][2]; + for (long i = 120; i < 122; ++i) + __builtin_printf("%d\n", f[i]); +} + +/* { dg-final { scan-assembler-times {vmv.v.i\s*v[0-9]+,0} 3 } } */ diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index 57992b6deca..c0c7013c817 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -124,6 +124,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-vectorizer.h" #include "tree-eh.h" #include "cgraph.h" +#include "tree-dfa.h" /* For lang_hooks.types.type_for_mode. */ #include "langhooks.h" @@ -2453,11 +2454,14 @@ mask_exists (int size, const vec<int> &vec) that does so. */ static gimple * -predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask) +predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask, + tree override_lhs = NULL_TREE) { gcall *new_stmt; - tree lhs = gimple_assign_lhs (stmt); + tree lhs = override_lhs; + if (!lhs) + lhs = gimple_assign_lhs (stmt); tree rhs = gimple_assign_rhs1 (stmt); tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs; mark_addressable (ref); @@ -2789,11 +2793,52 @@ predicate_statements (loop_p loop) vect_masks.safe_push (mask); } if (gimple_assign_single_p (stmt)) - new_stmt = predicate_load_or_store (&gsi, stmt, mask); - else - new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names); + { + bool target_has_else + = TARGET_MASK_LOAD_MASKED_MAYBE_NONZERO; + tree lhs = gimple_get_lhs (stmt); + + bool is_load = TREE_CODE (lhs) == SSA_NAME; + + gimple_seq stmts2 = NULL; + tree tmplhs = is_load && target_has_else + ? make_temp_ssa_name (TREE_TYPE (lhs), NULL, + "_ifc_") : lhs; + gimple *call_stmt + = predicate_load_or_store (&gsi, stmt, mask, tmplhs); + + gimple_seq_add_stmt (&stmts2, call_stmt); + + if (lhs != tmplhs) + ssa_names.add (tmplhs); - gsi_replace (&gsi, new_stmt, true); + if (is_load && target_has_else) + { + unsigned nops = gimple_call_num_args (call_stmt); + tree *ops = XALLOCAVEC (tree, nops); + + for (unsigned i = 0; i < nops; i++) + ops[i] = gimple_call_arg (call_stmt, i); + + tree els_operand + = targetm.preferred_else_value (IFN_MASK_LOAD, + TREE_TYPE (lhs), + nops, ops); + + tree rhs + = fold_build_cond_expr (TREE_TYPE (tmplhs), + mask, tmplhs, els_operand); + gassign *cond_stmt + = gimple_build_assign (gimple_get_lhs (stmt), rhs); + gimple_seq_add_stmt (&stmts2, cond_stmt); + } + gsi_replace_with_seq (&gsi, stmts2, true); + } + else + { + new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names); + gsi_replace (&gsi, new_stmt, true); + } } else if (((lhs = gimple_assign_lhs (stmt)), true) && (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 57085488722..65e6f91fe8b 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -3193,6 +3193,14 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2) /* Follow all SSA edges so that we generate FMS, FNMA and FNMS regardless of where the negation occurs. */ gimple *orig_stmt = gsi_stmt (gsi); + if (fold_stmt (&gsi, follow_all_ssa_edges)) + { + if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi))) + gcc_unreachable (); + update_stmt (gsi_stmt (gsi)); + } + /* Fold the result again. */ + orig_stmt = gsi_stmt (gsi); if (fold_stmt (&gsi, follow_all_ssa_edges)) { if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi))) -- 2.45.2