[gcc r14-9344] RISC-V: Adjust vec unit-stride load/store costs.
https://gcc.gnu.org/g:9ae83078fe45d093bbaa02b8348f2407fe0c62d6 commit r14-9344-g9ae83078fe45d093bbaa02b8348f2407fe0c62d6 Author: Robin Dapp Date: Mon Jan 15 17:34:58 2024 +0100 RISC-V: Adjust vec unit-stride load/store costs. Scalar loads provide offset addressing while unit-stride vector instructions cannot. The offset must be loaded into a general-purpose register before it can be used. In order to account for this, this patch adds an address arithmetic heuristic that keeps track of data reference operands. If we haven't seen the operand before we add the cost of a scalar statement. This helps to get rid of an lbm regression when vectorizing (roughly 0.5% fewer dynamic instructions). gcc5 improves by 0.2% and deepsjeng by 0.25%. wrf and nab degrade by 0.1%. This is because before we now adjust the cost of SLP as well as loop-vectorized instructions whereas we would only adjust loop-vectorized instructions before. Considering higher scalar_to_vec costs (3 vs 1) for all vectorization types causes some snippets not to get vectorized anymore. Given these costs the decision looks correct but appears worse when just counting dynamic instructions. In total SPECint 2017 has 4 bln dynamic instructions less and SPECfp 0.7 bln. gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (adjust_stmt_cost): Move... (costs::adjust_stmt_cost): ... to here and add vec_load/vec_store offset handling. (costs::add_stmt_cost): Also adjust cost for statements without stmt_info. * config/riscv/riscv-vector-costs.h: Define zero constant. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/vse-slp-1.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/vse-slp-2.c: New test. Diff: --- gcc/config/riscv/riscv-vector-costs.cc | 86 +++--- gcc/config/riscv/riscv-vector-costs.h | 10 +++ .../gcc.dg/vect/costmodel/riscv/rvv/vse-slp-1.c| 51 + .../gcc.dg/vect/costmodel/riscv/rvv/vse-slp-2.c| 51 + 4 files changed, 188 insertions(+), 10 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 7c9840df4e9..adf9c197df5 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "backend.h" #include "tree-data-ref.h" #include "tree-ssa-loop-niter.h" +#include "tree-hash-traits.h" /* This file should be included last. */ #include "riscv-vector-costs.h" @@ -1047,18 +1048,81 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const top of riscv_builtin_vectorization_cost handling which doesn't have any information on statement operation codes etc. */ -static unsigned -adjust_stmt_cost (enum vect_cost_for_stmt kind, tree vectype, int stmt_cost) +unsigned +costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop, +stmt_vec_info stmt_info, +slp_tree, tree vectype, int stmt_cost) { const cpu_vector_cost *costs = get_vector_costs (); switch (kind) { case scalar_to_vec: - return stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR - : costs->regmove->GR2VR); + stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR + : costs->regmove->GR2VR); + break; case vec_to_scalar: - return stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR - : costs->regmove->VR2GR); + stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR + : costs->regmove->VR2GR); + break; +case vector_load: +case vector_store: + { + /* Unit-stride vector loads and stores do not have offset addressing +as opposed to scalar loads and stores. +If the address depends on a variable we need an additional +add/sub for each load/store in the worst case. */ + if (stmt_info && stmt_info->stmt) + { + data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + class loop *father = stmt_info->stmt->bb->loop_father; + if (!loop && father && !father->inner && father->superloops) + { + tree ref; + if (TREE_CODE (dr->ref) != MEM_REF + || !(ref = TREE_OPERAND (dr->ref, 0)) + || TREE_CODE (ref) != SSA_NAME) + break; + + if (SSA_NAME_IS_DEFAULT_DEF (ref)) + break; + + if (memrefs.contains ({ref, cst0})) + break; + + memrefs.add ({ref, cst0}); + +
[gcc r14-9345] RISC-V: Use vmv1r.v instead of vmv.v.v for fma output reloads [PR114200].
https://gcc.gnu.org/g:59554a50be8ebbd52e8a6348a92110af182e1874 commit r14-9345-g59554a50be8ebbd52e8a6348a92110af182e1874 Author: Robin Dapp Date: Wed Mar 6 12:15:40 2024 +0100 RISC-V: Use vmv1r.v instead of vmv.v.v for fma output reloads [PR114200]. Three-operand instructions like vmacc are modeled with an implicit output reload when the output does not match one of the operands. For this we use vmv.v.v which is subject to length masking. In a situation where the current vl is less than the full vlenb and the fma's result value is used as input for a vector reduction (which is never length masked) we effectively only reduce vl elements. The masked-out elements are relevant for the reduction, though, leading to a wrong result. This patch replaces the vmv reloads by full-register reloads. gcc/ChangeLog: PR target/114200 PR target/114202 * config/riscv/vector.md: Use vmv[1248]r.v instead of vmv.v.v. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr114200.c: New test. * gcc.target/riscv/rvv/autovec/pr114202.c: New test. Diff: --- gcc/config/riscv/vector.md | 96 +++--- .../gcc.target/riscv/rvv/autovec/pr114200.c| 18 .../gcc.target/riscv/rvv/autovec/pr114202.c| 20 + 3 files changed, 86 insertions(+), 48 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index f89f9c2fa86..8b1c24c5d79 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -5351,10 +5351,10 @@ "@ vmadd.vv\t%0,%4,%5%p1 vmacc.vv\t%0,%3,%4%p1 - vmv.v.v\t%0,%4\;vmacc.vv\t%0,%3,%4%p1 + vmv%m4r.v\t%0,%4\;vmacc.vv\t%0,%3,%4%p1 vmadd.vv\t%0,%4,%5%p1 vmacc.vv\t%0,%3,%4%p1 - vmv.v.v\t%0,%5\;vmacc.vv\t%0,%3,%4%p1" + vmv%m5r.v\t%0,%5\;vmacc.vv\t%0,%3,%4%p1" [(set_attr "type" "vimuladd") (set_attr "mode" "")]) @@ -5378,9 +5378,9 @@ "TARGET_VECTOR" "@ vmadd.vv\t%0,%3,%4%p1 - vmv.v.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1 + vmv%m2r.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1 vmadd.vv\t%0,%3,%4%p1 - vmv.v.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1" + vmv%m2r.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1" [(set_attr "type" "vimuladd") (set_attr "mode" "") (set_attr "merge_op_idx" "2") @@ -5409,9 +5409,9 @@ "TARGET_VECTOR" "@ vmacc.vv\t%0,%2,%3%p1 - vmv.v.v\t%0,%4\;vmacc.vv\t%0,%2,%3%p1 + vmv%m4r.v\t%0,%4;vmacc.vv\t%0,%2,%3%p1 vmacc.vv\t%0,%2,%3%p1 - vmv.v.v\t%0,%4\;vmacc.vv\t%0,%2,%3%p1" + vmv%m4r.v\t%0,%4\;vmacc.vv\t%0,%2,%3%p1" [(set_attr "type" "vimuladd") (set_attr "mode" "") (set_attr "merge_op_idx" "4") @@ -5462,9 +5462,9 @@ "TARGET_VECTOR" "@ vmadd.vx\t%0,%2,%4%p1 - vmv.v.v\t%0,%3\;vmadd.vx\t%0,%2,%4%p1 + vmv%m3r.v\t%0,%3\;vmadd.vx\t%0,%2,%4%p1 vmadd.vx\t%0,%2,%4%p1 - vmv.v.v\t%0,%3\;vmadd.vx\t%0,%2,%4%p1" + vmv%m3r.v\t%0,%3\;vmadd.vx\t%0,%2,%4%p1" [(set_attr "type" "vimuladd") (set_attr "mode" "") (set_attr "merge_op_idx" "3") @@ -5494,9 +5494,9 @@ "TARGET_VECTOR" "@ vmacc.vx\t%0,%2,%3%p1 - vmv.v.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1 + vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1 vmacc.vx\t%0,%2,%3%p1 - vmv.v.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1" + vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1" [(set_attr "type" "vimuladd") (set_attr "mode" "") (set_attr "merge_op_idx" "4") @@ -5562,9 +5562,9 @@ "TARGET_VECTOR && !TARGET_64BIT" "@ vmadd.vx\t%0,%2,%4%p1 - vmv.v.v\t%0,%2\;vmadd.vx\t%0,%2,%4%p1 + vmv%m2r.v\t%0,%2\;vmadd.vx\t%0,%2,%4%p1 vmadd.vx\t%0,%2,%4%p1 - vmv.v.v\t%0,%2\;vmadd.vx\t%0,%2,%4%p1" + vmv%m2r.v\t%0,%2\;vmadd.vx\t%0,%2,%4%p1" [(set_attr "type" "vimuladd") (set_attr "mode" "") (set_attr "merge_op_idx" "3") @@ -5595,9 +5595,9 @@ "TARGET_VECTOR && !TARGET_64BIT" "@ vmacc.vx\t%0,%2,%3%p1 - vmv.v.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1 + vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1 vmacc.vx\t%0,%2,%3%p1 - vmv.v.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1" + vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%2,%3%p1" [(set_attr "type" "vimuladd") (set_attr "mode" "") (set_attr "merge_op_idx" "4") @@ -5649,10 +5649,10 @@ "@ vnmsub.vv\t%0,%4,%5%p1 vnmsac.vv\t%0,%3,%4%p1 - vmv.v.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1 + vmv%m3r.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1 vnmsub.vv\t%0,%4,%5%p1 vnmsac.vv\t%0,%3,%4%p1 - vmv.v.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1" + vmv%m3r.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1" [(set_attr "type" "vimuladd") (set_attr "mode" "")]) @@ -5676,9 +5676,9 @@ "TARGET_VECTOR" "@ vnmsub.vv\t%0,%3,%4%p1 - vmv.v.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1 + vmv%m2r.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1 vnmsub.vv\t%0,%3,%4%p1 - vmv.v.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1" + vmv%m2r.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1" [(set_attr "type" "vimuladd") (set_attr "mode" "") (set_attr "merge_op_idx" "2") @@ -5707,9 +5707,9
[gcc r14-9366] vect: Do not peel epilogue for partial vectors.
https://gcc.gnu.org/g:226043a4d8fb23c7fe7bf16e485b3cfaa094db21 commit r14-9366-g226043a4d8fb23c7fe7bf16e485b3cfaa094db21 Author: Robin Dapp Date: Wed Mar 6 16:54:35 2024 +0100 vect: Do not peel epilogue for partial vectors. r14-7036-gcbf569486b2dec added an epilogue vectorization guard for early break but PR114196 shows that we also run into the problem without early break. Therefore merge the condition into the topmost vectorization guard. gcc/ChangeLog: PR middle-end/114196 * tree-vect-loop-manip.cc (vect_can_peel_nonlinear_iv_p): Merge vectorization guards. gcc/testsuite/ChangeLog: * gcc.target/aarch64/pr114196.c: New test. * gcc.target/riscv/rvv/autovec/pr114196.c: New test. Diff: --- gcc/testsuite/gcc.target/aarch64/pr114196.c| 19 ++ .../gcc.target/riscv/rvv/autovec/pr114196.c| 19 ++ gcc/tree-vect-loop-manip.cc| 30 +- 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/gcc/testsuite/gcc.target/aarch64/pr114196.c b/gcc/testsuite/gcc.target/aarch64/pr114196.c new file mode 100644 index 000..15e4b0e31b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr114196.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options { -O3 -fno-vect-cost-model -march=armv9-a -msve-vector-bits=256 } } */ + +unsigned a; +int b; +long *c; + +int +main () +{ + for (int d = 0; d < 22; d += 4) { + b = ({ + int e = c[d]; + e; + }) + ? 0 : -c[d]; + a *= 3; + } +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114196.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114196.c new file mode 100644 index 000..7ba9cbbed70 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114196.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options { -O3 -fno-vect-cost-model -march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl } } */ + +unsigned a; +int b; +long *c; + +int +main () +{ + for (int d = 0; d < 22; d += 4) { + b = ({ + int e = c[d]; + e; + }) + ? 0 : -c[d]; + a *= 3; + } +} diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index f72da915103..56a6d8e4a8d 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -2129,16 +2129,19 @@ vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo, For mult, don't known how to generate init_expr * pow (step, niters) for variable niters. For neg, it should be ok, since niters of vectorized main loop - will always be multiple of 2. */ - if ((!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()) + will always be multiple of 2. + See also PR113163 and PR114196. */ + if ((!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant () + || LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) + || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) && induction_type != vect_step_op_neg) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "Peeling for epilogue is not supported" " for nonlinear induction except neg" -" when iteration count is unknown.\n"); +" when iteration count is unknown or" +" when using partial vectorization.\n"); return false; } @@ -2178,25 +2181,6 @@ vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo, return false; } - /* We can't support partial vectors and early breaks with an induction - type other than add or neg since we require the epilog and can't - perform the peeling. The below condition mirrors that of - vect_gen_vector_loop_niters where niters_vector_mult_vf_var then sets - step_vector to VF rather than 1. This is what creates the nonlinear - IV. PR113163. */ - if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) - && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant () - && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) - && induction_type != vect_step_op_neg) -{ - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -"Peeling for epilogue is not supported" -" for nonlinear induction except neg" -" when VF is known and early breaks.\n"); - return false; -} - return true; }
[gcc r15-638] internal-fn: Do not force vcond_mask operands to reg.
https://gcc.gnu.org/g:7ca35f2e430081d6ec91e910002f92d9713350fa commit r15-638-g7ca35f2e430081d6ec91e910002f92d9713350fa Author: Robin Dapp Date: Fri May 10 12:44:44 2024 +0200 internal-fn: Do not force vcond_mask operands to reg. In order to directly use constants this patch removes force_regs in the vcond_mask expander. gcc/ChangeLog: PR middle-end/113474 * internal-fn.cc (expand_vec_cond_mask_optab_fn): Remove force_regs. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr113474.c: New test. Diff: --- gcc/internal-fn.cc| 3 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113474.c | 13 + 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 73045ca8c8c1..9c09026793fa 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -3165,9 +3165,6 @@ expand_vec_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab) rtx_op1 = expand_normal (op1); rtx_op2 = expand_normal (op2); - mask = force_reg (mask_mode, mask); - rtx_op1 = force_reg (mode, rtx_op1); - rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); create_output_operand (&ops[0], target, mode); create_input_operand (&ops[1], rtx_op1, mode); diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113474.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113474.c new file mode 100644 index ..0364bf9f5e38 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113474.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target riscv_v } } */ +/* { dg-additional-options "-std=c99" } */ + +void +foo (int n, int **a) +{ + int b; + for (b = 0; b < n; b++) +for (long e = 8; e > 0; e--) + a[b][e] = a[b][e] == 15; +} + +/* { dg-final { scan-assembler "vmerge.vim" } } */
[gcc r15-639] RISC-V: Add initial cost handling for segment loads/stores.
https://gcc.gnu.org/g:e0b9c8ad7098fb08a25a61fe17d4274dd73e5145 commit r15-639-ge0b9c8ad7098fb08a25a61fe17d4274dd73e5145 Author: Robin Dapp Date: Mon Feb 26 13:09:15 2024 +0100 RISC-V: Add initial cost handling for segment loads/stores. This patch makes segment loads and stores more expensive. It adds segment_permute_2 as well as 3 to 8 cost fields to the common vector costs and adds handling to adjust_stmt_cost. gcc/ChangeLog: * config/riscv/riscv-protos.h (struct common_vector_cost): Add segment_permute cost. * config/riscv/riscv-vector-costs.cc (costs::adjust_stmt_cost): Handle segment loads/stores. * config/riscv/riscv.cc: Initialize segment_permute_[2-8] to 1. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c: Adjust test. Diff: --- gcc/config/riscv/riscv-protos.h| 9 ++ gcc/config/riscv/riscv-vector-costs.cc | 163 +++-- gcc/config/riscv/riscv.cc | 14 ++ .../gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c | 4 +- 4 files changed, 146 insertions(+), 44 deletions(-) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 565ead1382a7..004ceb1031b8 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -222,6 +222,15 @@ struct common_vector_cost const int gather_load_cost; const int scatter_store_cost; + /* Segment load/store permute cost. */ + const int segment_permute_2; + const int segment_permute_3; + const int segment_permute_4; + const int segment_permute_5; + const int segment_permute_6; + const int segment_permute_7; + const int segment_permute_8; + /* Cost of a vector-to-scalar operation. */ const int vec_to_scalar_cost; diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 4582b0db4250..0a88e142a934 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -1052,6 +1052,25 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const return vector_costs::better_main_loop_than_p (other); } +/* Returns the group size i.e. the number of vectors to be loaded by a + segmented load/store instruction. Return 0 if it is no segmented + load/store. */ +static int +segment_loadstore_group_size (enum vect_cost_for_stmt kind, + stmt_vec_info stmt_info) +{ + if (stmt_info + && (kind == vector_load || kind == vector_store) + && STMT_VINFO_DATA_REF (stmt_info)) +{ + stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); + if (stmt_info + && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES) + return DR_GROUP_SIZE (stmt_info); +} + return 0; +} + /* Adjust vectorization cost after calling riscv_builtin_vectorization_cost. For some statement, we would like to further fine-grain tweak the cost on top of riscv_builtin_vectorization_cost handling which doesn't have any @@ -1076,55 +1095,115 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop, case vector_load: case vector_store: { - /* Unit-stride vector loads and stores do not have offset addressing -as opposed to scalar loads and stores. -If the address depends on a variable we need an additional -add/sub for each load/store in the worst case. */ - if (stmt_info && stmt_info->stmt) + if (stmt_info && stmt_info->stmt && STMT_VINFO_DATA_REF (stmt_info)) { - data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); - class loop *father = stmt_info->stmt->bb->loop_father; - if (!loop && father && !father->inner && father->superloops) + /* Segment loads and stores. When the group size is > 1 +the vectorizer will add a vector load/store statement for +each vector in the group. Here we additionally add permute +costs for each. */ + /* TODO: Indexed and ordered/unordered cost. */ + int group_size = segment_loadstore_group_size (kind, stmt_info); + if (group_size > 1) + { + switch (group_size) + { + case 2: + if (riscv_v_ext_vector_mode_p (loop->vector_mode)) + stmt_cost += costs->vla->segment_permute_2; + else + stmt_cost += costs->vls->segment_permute_2; + break; + case 3: + if (riscv_v_ext_vector_mode_p (loop->vector_mode)) + stmt_cost += costs->vla->segment_permute_3; + else + stmt_cost += costs->vls->segment_permute_3; + brea
[gcc r15-3119] RISC-V: Expand vec abs without masking.
https://gcc.gnu.org/g:c22d57cdc52d990eb7d353fa82c67882bc824d40 commit r15-3119-gc22d57cdc52d990eb7d353fa82c67882bc824d40 Author: Robin Dapp Date: Fri Aug 9 15:05:39 2024 +0200 RISC-V: Expand vec abs without masking. Standard abs synthesis during expand is max (a, -a). This expansion has the advantage of avoiding masking and is thus potentially faster than the a < 0 ? -a : a synthesis. gcc/ChangeLog: * config/riscv/autovec.md (abs2): Expand via max (a, -a). gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/abs-rv32gcv.c: Adjust test expectation. * gcc.target/riscv/rvv/autovec/unop/abs-rv64gcv.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/abs-2.c: Ditto. * gcc.target/riscv/rvv/autovec/cond/cond_unary-1.c: Ditto. * gcc.target/riscv/rvv/autovec/cond/cond_unary-2.c: Ditto. * gcc.target/riscv/rvv/autovec/cond/cond_unary-3.c: Ditto. * gcc.target/riscv/rvv/autovec/cond/cond_unary-4.c: Ditto. * gcc.target/riscv/rvv/autovec/cond/cond_unary-5.c: Ditto. * gcc.target/riscv/rvv/autovec/cond/cond_unary-6.c: Ditto. * gcc.target/riscv/rvv/autovec/cond/cond_unary-7.c: Ditto. * gcc.target/riscv/rvv/autovec/cond/cond_unary-8.c: Ditto. Diff: --- gcc/config/riscv/autovec.md| 26 +++--- .../riscv/rvv/autovec/cond/cond_unary-1.c | 6 +++-- .../riscv/rvv/autovec/cond/cond_unary-2.c | 6 +++-- .../riscv/rvv/autovec/cond/cond_unary-3.c | 6 +++-- .../riscv/rvv/autovec/cond/cond_unary-4.c | 6 +++-- .../riscv/rvv/autovec/cond/cond_unary-5.c | 6 +++-- .../riscv/rvv/autovec/cond/cond_unary-6.c | 6 +++-- .../riscv/rvv/autovec/cond/cond_unary-7.c | 6 +++-- .../riscv/rvv/autovec/cond/cond_unary-8.c | 6 +++-- .../riscv/rvv/autovec/unop/abs-rv32gcv.c | 6 ++--- .../riscv/rvv/autovec/unop/abs-rv64gcv.c | 6 ++--- .../gcc.target/riscv/rvv/autovec/vls/abs-2.c | 2 +- 12 files changed, 47 insertions(+), 41 deletions(-) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index decfe2bf8cc8..4decaedbd826 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -1073,29 +1073,19 @@ [(set_attr "type" "vialu")]) ;; --- -;; - [INT] ABS expansion to vmslt and vneg. +;; - [INT] ABS expansion to vneg and vmax. ;; --- -(define_insn_and_split "abs2" +(define_expand "abs2" [(set (match_operand:V_VLSI 0 "register_operand") - (abs:V_VLSI - (match_operand:V_VLSI 1 "register_operand")))] - "TARGET_VECTOR && can_create_pseudo_p ()" - "#" - "&& 1" - [(const_int 0)] +(smax:V_VLSI + (match_dup 0) + (neg:V_VLSI + (match_operand:V_VLSI 1 "register_operand"] + "TARGET_VECTOR" { - rtx zero = gen_const_vec_duplicate (mode, GEN_INT (0)); - machine_mode mask_mode = riscv_vector::get_mask_mode (mode); - rtx mask = gen_reg_rtx (mask_mode); - riscv_vector::expand_vec_cmp (mask, LT, operands[1], zero); - - rtx ops[] = {operands[0], mask, operands[1], operands[1]}; - riscv_vector::emit_vlmax_insn (code_for_pred (NEG, mode), - riscv_vector::UNARY_OP_TAMU, ops); DONE; -} -[(set_attr "type" "vector")]) +}) ;; --- ;; [FP] Unary operations diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-1.c index 2233c6eeecb9..4866b221ca4a 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-1.c @@ -36,8 +36,10 @@ TEST_ALL (DEF_LOOP) -/* NOTE: int abs operator is converted to vmslt + vneg.v */ -/* { dg-final { scan-assembler-times {\tvneg\.v\tv[0-9]+,v[0-9]+,v0\.t} 8 } } */ +/* NOTE: int abs operator is converted to vneg.v + vmax.vv */ +/* { dg-final { scan-assembler-times {\tvneg\.v\tv[0-9]+,v[0-9]+} 8 } } */ +/* { dg-final { scan-assembler-times {\tvmax\.vv\tv[0-9]+,v[0-9]+,v[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {\tvneg\.v\tv[0-9]+,v[0-9]+,v0\.t} 4 } } */ /* { dg-final { scan-assembler-times {\tvnot\.v\tv[0-9]+,v[0-9]+,v0\.t} 4 } } */ /* { dg-final { scan-assembler-times {\tvfabs\.v\tv[0-9]+,v[0-9]+,v0\.t} 3 } } */ /* { dg-final { scan-assembler-times {\tvfneg\.v\tv[0-9]+,v[0-9]+,v0\.t} 3 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-2.c index 4886bff67d86..651df9f86461 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_unary-2.c +++ b/gcc/testsu
[gcc r15-3120] optabs-query: Use opt_machine_mode for smallest_int_mode_for_size [PR115495].
https://gcc.gnu.org/g:96fe95bac67c7303dc811c04f5e99cc959a7182a commit r15-3120-g96fe95bac67c7303dc811c04f5e99cc959a7182a Author: Robin Dapp Date: Tue Aug 20 14:02:09 2024 +0200 optabs-query: Use opt_machine_mode for smallest_int_mode_for_size [PR115495]. In get_best_extraction_insn we use smallest_int_mode_for_size with struct_bits as size argument. PR115495 has struct_bits = 256 and we don't have a mode for that. This patch makes smallest_mode_for_size and smallest_int_mode_for_size return opt modes so we can just skip over the loop when there is no mode. PR middle-end/115495 gcc/ChangeLog: * cfgexpand.cc (expand_debug_expr): Require mode. * combine.cc (make_extraction): Ditto. * config/aarch64/aarch64.cc (aarch64_expand_cpymem): Ditto. (aarch64_expand_setmem): Ditto. * config/arc/arc.cc (arc_expand_cpymem): Ditto. * config/arm/arm.cc (arm_expand_divmod_libfunc): Ditto. * config/i386/i386.cc (ix86_get_mask_mode): Ditto. * config/rs6000/predicates.md: Ditto. * config/rs6000/rs6000.cc (vspltis_constant): Ditto. * config/s390/s390.cc (s390_expand_insv): Ditto. * config/sparc/sparc.cc (assign_int_registers): Ditto. * coverage.cc (get_gcov_type): Ditto. (get_gcov_unsigned_t): Ditto. * dse.cc (find_shift_sequence): Ditto. * expmed.cc (store_integral_bit_field): Ditto. * expr.cc (convert_mode_scalar): Ditto. (op_by_pieces_d::smallest_fixed_size_mode_for_size): Ditto. (emit_block_move_via_oriented_loop): Ditto. (copy_blkmode_to_reg): Ditto. (store_field): Ditto. * internal-fn.cc (expand_arith_overflow): Ditto. * machmode.h (HAVE_MACHINE_MODES): Ditto. (smallest_mode_for_size): Use opt_machine_mode. (smallest_int_mode_for_size): Use opt_scalar_int_mode. * optabs-query.cc (get_best_extraction_insn): Require mode. * optabs.cc (expand_twoval_binop_libfunc): Ditto. * stor-layout.cc (smallest_mode_for_size): Return opt_machine_mode. (layout_type): Require mode. (initialize_sizetypes): Ditto. * tree-ssa-loop-manip.cc (canonicalize_loop_ivs): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr115495.c: New test. gcc/ada/ChangeLog: * gcc-interface/utils2.cc (fast_modulo_reduction): Require mode. (nonbinary_modular_operation): Ditto. Diff: --- gcc/ada/gcc-interface/utils2.cc | 5 +++-- gcc/cfgexpand.cc | 2 +- gcc/combine.cc| 2 +- gcc/config/aarch64/aarch64.cc | 6 -- gcc/config/arc/arc.cc | 2 +- gcc/config/arm/arm.cc | 2 +- gcc/config/i386/i386.cc | 4 ++-- gcc/config/rs6000/predicates.md | 2 +- gcc/config/rs6000/rs6000.cc | 2 +- gcc/config/s390/s390.cc | 2 +- gcc/config/sparc/sparc.cc | 2 +- gcc/coverage.cc | 5 +++-- gcc/dse.cc| 3 ++- gcc/expmed.cc | 3 ++- gcc/expr.cc | 17 + gcc/internal-fn.cc| 4 ++-- gcc/machmode.h| 10 +- gcc/optabs-query.cc | 1 + gcc/optabs.cc | 3 ++- gcc/stor-layout.cc| 16 +--- gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115495.c | 9 + gcc/tree-ssa-loop-manip.cc| 2 +- 22 files changed, 62 insertions(+), 42 deletions(-) diff --git a/gcc/ada/gcc-interface/utils2.cc b/gcc/ada/gcc-interface/utils2.cc index 0d7e03ec6b07..8eebf5935960 100644 --- a/gcc/ada/gcc-interface/utils2.cc +++ b/gcc/ada/gcc-interface/utils2.cc @@ -661,7 +661,7 @@ fast_modulo_reduction (tree op, tree modulus, unsigned int precision) if (type_precision < BITS_PER_WORD) { const scalar_int_mode m - = smallest_int_mode_for_size (type_precision + 1); + = smallest_int_mode_for_size (type_precision + 1).require (); tree new_type = gnat_type_for_mode (m, 1); op = fold_convert (new_type, op); modulus = fold_convert (new_type, modulus); @@ -721,7 +721,8 @@ nonbinary_modular_operation (enum tree_code op_code, tree type, tree lhs, for its mode since ope
[gcc r15-3282] RISC-V: Fix subreg of VLS modes larger than a vector [PR116086].
https://gcc.gnu.org/g:4ff4875a79ccb302dc2401c32fe0af2187b61b99 commit r15-3282-g4ff4875a79ccb302dc2401c32fe0af2187b61b99 Author: Robin Dapp Date: Tue Aug 27 10:25:34 2024 +0200 RISC-V: Fix subreg of VLS modes larger than a vector [PR116086]. When the source mode is potentially larger than one vector (e.g. an LMUL2 mode for VLEN=128) we don't know which vector the subreg actually refers to. For zvl128b and LMUL=2 the subreg in (subreg:V2DI (reg:V4DI)) could actually be the a full (high) vector register of a two-register group (at VLEN=128) or the higher part of a single register (at VLEN>128). As the subreg is statically ambiguous we prevent such situations in can_change_mode_class. The culprit in PR116086 is _12 = BIT_FIELD_REF ; which can be expanded with a vector-vector extract (from V4DI to V2DI). This patch adds a VLS-mode vector-vector extract that handles "halving" cases like this one by sliding down the source vector, thus making sure the correct part is used. PR target/116086 gcc/ChangeLog: * config/riscv/autovec.md (vec_extract): Add vector-vector extract for VLS modes. * config/riscv/riscv.cc (riscv_can_change_mode_class): Forbid VLS modes larger than one vector. * config/riscv/vector-iterators.md: Add vector-vector extract iterators. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Add effective target checks for zvl256b and zvl512b. * gcc.target/riscv/rvv/autovec/pr116086-2-run.c: New test. * gcc.target/riscv/rvv/autovec/pr116086-2.c: New test. * gcc.target/riscv/rvv/autovec/pr116086.c: New test. Diff: --- gcc/config/riscv/autovec.md| 35 gcc/config/riscv/riscv.cc | 11 ++ gcc/config/riscv/vector-iterators.md | 202 + .../gcc.target/riscv/rvv/autovec/pr116086-2-run.c | 6 + .../gcc.target/riscv/rvv/autovec/pr116086-2.c | 18 ++ .../gcc.target/riscv/rvv/autovec/pr116086.c| 76 gcc/testsuite/lib/target-supports.exp | 37 7 files changed, 385 insertions(+) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 4decaedbd826..a4e108268b44 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -1453,6 +1453,41 @@ DONE; }) +;; - +;; [INT,FP] Extract a vector from a vector. +;; - +;; TODO: This can be extended to allow basically any extract mode. +;; For now this helps optimize VLS subregs like (subreg:V2DI (reg:V4DI) 16) +;; that would otherwise need to go via memory. + +(define_expand "vec_extract" + [(set (match_operand: 0 "nonimmediate_operand") + (vec_select: + (match_operand:VLS_HAS_HALF 1 "register_operand") + (parallel +[(match_operand 2 "immediate_operand")])))] + "TARGET_VECTOR" +{ + int sz = GET_MODE_NUNITS (mode).to_constant (); + int part = INTVAL (operands[2]); + + rtx start = GEN_INT (part * sz); + rtx tmp = operands[1]; + + if (part != 0) +{ + tmp = gen_reg_rtx (mode); + + rtx ops[] = {tmp, operands[1], start}; + riscv_vector::emit_vlmax_insn + (code_for_pred_slide (UNSPEC_VSLIDEDOWN, mode), +riscv_vector::BINARY_OP, ops); +} + + emit_move_insn (operands[0], gen_lowpart (mode, tmp)); + DONE; +}) + ;; - ;; [FP] Binary operations ;; - diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index e9b1b9bc3add..3f5dfb838425 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10670,6 +10670,17 @@ riscv_can_change_mode_class (machine_mode from, machine_mode to, if (reg_classes_intersect_p (V_REGS, rclass) && !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to))) return false; + + /* Subregs of modes larger than one vector are ambiguous. + A V4DImode with rv64gcv_zvl128b could, for example, span two registers/one + register group of two at VLEN = 128 or one register at VLEN >= 256 and + we cannot, statically, determine which part of it to extract. + Therefore prevent that. */ + if (reg_classes_intersect_p (V_REGS, rclass) + && riscv_v_ext_vls_mode_p (from) + && !ordered_p (BITS_PER_RISCV_VECTOR, GET_MODE_PRECISION (from))) + return false; + return !reg_classes_intersect_p (FP_REGS, rclass); } diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index cbbd248c9bb3..a00b5c3feddd 100644 --- a/gcc/config/riscv/vector-iterat
[gcc r15-1861] RISC-V: Use tu policy for first-element vec_set [PR115725].
https://gcc.gnu.org/g:acc3b703c05debc6276451f9daae5d0ffc797eac commit r15-1861-gacc3b703c05debc6276451f9daae5d0ffc797eac Author: Robin Dapp Date: Mon Jul 1 13:37:17 2024 +0200 RISC-V: Use tu policy for first-element vec_set [PR115725]. This patch changes the tail policy for vmv.s.x from ta to tu. By default the bug does not show up with qemu because qemu's current vmv.s.x implementation always uses the tail-undisturbed policy. With a local qemu version that overwrites the tail with ones when the tail-agnostic policy is specified, the bug shows. gcc/ChangeLog: * config/riscv/autovec.md: Add TU policy. * config/riscv/riscv-protos.h (enum insn_type): Define SCALAR_MOVE_MERGED_OP_TU. gcc/testsuite/ChangeLog: PR target/115725 * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c: Adjust test expectation. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c: Ditto. Diff: --- gcc/config/riscv/autovec.md | 3 ++- gcc/config/riscv/riscv-protos.h | 4 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c | 12 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c | 12 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c | 12 .../gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c | 12 6 files changed, 22 insertions(+), 33 deletions(-) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 66d70f678a6..0fb6316a2cf 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -1341,7 +1341,8 @@ { rtx ops[] = {operands[0], operands[0], operands[1]}; riscv_vector::emit_nonvlmax_insn (code_for_pred_broadcast (mode), - riscv_vector::SCALAR_MOVE_MERGED_OP, ops, CONST1_RTX (Pmode)); + riscv_vector::SCALAR_MOVE_MERGED_OP_TU, + ops, CONST1_RTX (Pmode)); } else { diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index a8b76173fa0..abf6e34b5cc 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -524,6 +524,10 @@ enum insn_type : unsigned int SCALAR_MOVE_MERGED_OP = HAS_DEST_P | HAS_MASK_P | USE_ONE_TRUE_MASK_P | HAS_MERGE_P | TDEFAULT_POLICY_P | MDEFAULT_POLICY_P | UNARY_OP_P, + + SCALAR_MOVE_MERGED_OP_TU = HAS_DEST_P | HAS_MASK_P | USE_ONE_TRUE_MASK_P + | HAS_MERGE_P | TU_POLICY_P | MDEFAULT_POLICY_P + | UNARY_OP_P, }; enum vlmul_type diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c index ecb160933d6..99b0f625c83 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c @@ -64,14 +64,10 @@ typedef double vnx2df __attribute__((vector_size (16))); TEST_ALL1 (VEC_SET) TEST_ALL_VAR1 (VEC_SET_VAR1) -/* { dg-final { scan-assembler-times {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */ -/* { dg-final { scan-assembler-times {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m1,\s*tu,\s*ma} 5 } } */ -/* { dg-final { scan-assembler-times {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m1,\s*ta,\s*ma} 2 } } */ -/* { dg-final { scan-assembler-times {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m1,\s*tu,\s*ma} 6 } } */ -/* { dg-final { scan-assembler-times {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m1,\s*ta,\s*ma} 2 } } */ -/* { dg-final { scan-assembler-times {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m1,\s*tu,\s*ma} 6 } } */ -/* { dg-final { scan-assembler-times {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m1,\s*ta,\s*ma} 2 } } */ -/* { dg-final { scan-assembler-times {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m1,\s*tu,\s*ma} 4 } } */ +/* { dg-final { scan-assembler-times {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m1,\s*tu,\s*ma} 6 } } */ +/* { dg-final { scan-assembler-times {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m1,\s*tu,\s*ma} 8 } } */ +/* { dg-final { scan-assembler-times {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m1,\s*tu,\s*ma} 8 } } */ +/* { dg-final { scan-assembler-times {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m1,\s*tu,\s*ma} 6 } } */ /* { dg-final { scan-assembler-times {\tvmv.v.x} 13 } } */ /* { dg-final { scan-assembler-times {\tvfmv.v.f} 8 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c index 194abff77cc..64a40308eb1 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c +++ b/
[gcc r15-2300] RISC-V: Allow LICM hoist POLY_INT configuration code sequence
https://gcc.gnu.org/g:4cbbce045681c234387d8d56376ea179dc869229 commit r15-2300-g4cbbce045681c234387d8d56376ea179dc869229 Author: Juzhe-Zhong Date: Thu Feb 1 23:45:50 2024 +0800 RISC-V: Allow LICM hoist POLY_INT configuration code sequence Realize in recent benchmark evaluation (coremark-pro zip-test): vid.v v2 vmv.v.i v5,0 .L9: vle16.v v3,0(a4) vrsub.vxv4,v2,a6 ---> LICM failed to hoist it outside the loop. The root cause is: (insn 56 47 57 4 (set (subreg:DI (reg:HI 220) 0) (reg:DI 223)) "rvv.c":11:9 208 {*movdi_64bit} -> Its result used by the following vrsub.vx then supress the hoist of the vrsub.vx (nil)) (insn 57 56 59 4 (set (reg:RVVMF2HI 216) (if_then_else:RVVMF2HI (unspec:RVVMF32BI [ (const_vector:RVVMF32BI repeat [ (const_int 1 [0x1]) ]) (reg:DI 350) (const_int 2 [0x2]) repeated x2 (const_int 1 [0x1]) (reg:SI 66 vl) (reg:SI 67 vtype) ] UNSPEC_VPREDICATE) (minus:RVVMF2HI (vec_duplicate:RVVMF2HI (reg:HI 220)) (reg:RVVMF2HI 217)) (unspec:RVVMF2HI [ (reg:DI 0 zero) ] UNSPEC_VUNDEF))) "rvv.c":11:9 6938 {pred_subrvvmf2hi_reverse_scalar} (expr_list:REG_DEAD (reg:HI 220) (nil))) This patch fixes it generate (set (reg:HI) (subreg:HI (reg:DI))) instead of (set (subreg:DI (reg:DI)) (reg:DI)). After this patch: vid.v v2 vrsub.vxv2,v2,a7 vmv.v.i v4,0 .L3: vle16.v v3,0(a4) Tested on both RV32 and RV64 no regression. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_legitimize_move): Fix poly_int dest generation. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/poly_licm-1.c: New test. * gcc.target/riscv/rvv/autovec/poly_licm-2.c: New test. * gcc.target/riscv/rvv/autovec/poly_licm-3.c: New test. Diff: --- gcc/config/riscv/riscv.cc | 9 .../gcc.target/riscv/rvv/autovec/poly_licm-1.c | 18 +++ .../gcc.target/riscv/rvv/autovec/poly_licm-2.c | 27 ++ .../gcc.target/riscv/rvv/autovec/poly_licm-3.c | 26 + 4 files changed, 76 insertions(+), 4 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 1696fa296482..96c4ab65 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3230,16 +3230,17 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) (const_poly_int:HI [m, n]) (const_poly_int:SI [m, n]). */ rtx tmp = gen_reg_rtx (Pmode); - riscv_legitimize_poly_move (Pmode, gen_lowpart (Pmode, dest), tmp, - src); + rtx tmp2 = gen_reg_rtx (Pmode); + riscv_legitimize_poly_move (Pmode, tmp2, tmp, src); + emit_move_insn (dest, gen_lowpart (mode, tmp2)); } else { /* In RV32 system, handle (const_poly_int:SI [m, n]) (const_poly_int:DI [m, n]). In RV64 system, handle (const_poly_int:DI [m, n]). - FIXME: Maybe we could gen SImode in RV32 and then sign-extend to DImode, - the offset should not exceed 4GiB in general. */ +FIXME: Maybe we could gen SImode in RV32 and then sign-extend to +DImode, the offset should not exceed 4GiB in general. */ rtx tmp = gen_reg_rtx (mode); riscv_legitimize_poly_move (mode, dest, tmp, src); } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c new file mode 100644 index ..b7da65f09964 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2" } */ + +extern int wsize; + +typedef unsigned short Posf; +#define NIL 0 + +void foo (Posf *p) +{ + register unsigned n, m; + do { + m = *--p; + *p = (Posf)(m >= wsize ? m-wsize : NIL); + } while (--n); +} + +/* { dg-final { scan-assembler-times {vid\.v\s+v[0-9]+\s+addi\s+\s*[a-x0-9]+,\s*[a-x0-9]+,\s*-1\s+vrsub\.vx\s+} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c new file mode 100644 index ..ffb3c63149f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/poly_licm-2.c @@ -0,0 +1,27 @@ +/
[gcc r15-2301] RISC-V: Error early with V and no M extension.
https://gcc.gnu.org/g:e589ffb6d78881572ddea21df0d9b6c2641d574d commit r15-2301-ge589ffb6d78881572ddea21df0d9b6c2641d574d Author: Robin Dapp Date: Wed Jul 24 09:08:00 2024 +0200 RISC-V: Error early with V and no M extension. For calculating the value of a poly_int at runtime we use a multiplication instruction that requires the M extension. Instead of just asserting and ICEing this patch emits an early error at option-parsing time. gcc/ChangeLog: PR target/116036 * config/riscv/riscv.cc (riscv_override_options_internal): Error with TARGET_VECTOR && !TARGET_MUL. gcc/testsuite/ChangeLog: * gcc.target/riscv/arch-31.c: Add m to arch string and expect it. * gcc.target/riscv/arch-32.c: Ditto. * gcc.target/riscv/arch-37.c: Ditto. * gcc.target/riscv/arch-38.c: Ditto. * gcc.target/riscv/predef-14.c: Ditto. * gcc.target/riscv/predef-15.c: Ditto. * gcc.target/riscv/predef-16.c: Ditto. * gcc.target/riscv/predef-26.c: Ditto. * gcc.target/riscv/predef-27.c: Ditto. * gcc.target/riscv/predef-32.c: Ditto. * gcc.target/riscv/predef-33.c: Ditto. * gcc.target/riscv/predef-36.c: Ditto. * gcc.target/riscv/predef-37.c: Ditto. * gcc.target/riscv/rvv/autovec/pr111486.c: Add m to arch string. * gcc.target/riscv/compare-debug-1.c: Ditto. * gcc.target/riscv/compare-debug-2.c: Ditto. * gcc.target/riscv/rvv/base/pr116036.c: New test. Diff: --- gcc/config/riscv/riscv.cc | 5 + gcc/testsuite/gcc.target/riscv/arch-31.c | 2 +- gcc/testsuite/gcc.target/riscv/arch-32.c | 2 +- gcc/testsuite/gcc.target/riscv/arch-37.c | 2 +- gcc/testsuite/gcc.target/riscv/arch-38.c | 2 +- gcc/testsuite/gcc.target/riscv/compare-debug-1.c | 2 +- gcc/testsuite/gcc.target/riscv/compare-debug-2.c | 2 +- gcc/testsuite/gcc.target/riscv/predef-14.c| 6 +++--- gcc/testsuite/gcc.target/riscv/predef-15.c| 4 ++-- gcc/testsuite/gcc.target/riscv/predef-16.c| 4 ++-- gcc/testsuite/gcc.target/riscv/predef-26.c| 6 +- gcc/testsuite/gcc.target/riscv/predef-27.c| 6 +- gcc/testsuite/gcc.target/riscv/predef-32.c| 6 +- gcc/testsuite/gcc.target/riscv/predef-33.c| 6 +- gcc/testsuite/gcc.target/riscv/predef-36.c| 6 +- gcc/testsuite/gcc.target/riscv/predef-37.c| 6 +- gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111486.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/pr116036.c| 11 +++ 18 files changed, 60 insertions(+), 20 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 96c4ab65..2bb7f2aace1b 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -9691,6 +9691,11 @@ riscv_override_options_internal (struct gcc_options *opts) else if (!TARGET_MUL_OPTS_P (opts) && TARGET_DIV_OPTS_P (opts)) error ("%<-mdiv%> requires %<-march%> to subsume the % extension"); + /* We might use a multiplication to calculate the scalable vector length at + runtime. Therefore, require the M extension. */ + if (TARGET_VECTOR && !TARGET_MUL) +sorry ("GCC's current % implementation requires the % extension"); + /* Likewise floating-point division and square root. */ if ((TARGET_HARD_FLOAT_OPTS_P (opts) || TARGET_ZFINX_OPTS_P (opts)) && ((target_flags_explicit & MASK_FDIV) == 0)) diff --git a/gcc/testsuite/gcc.target/riscv/arch-31.c b/gcc/testsuite/gcc.target/riscv/arch-31.c index 5180753b9057..9b867c5ecd20 100644 --- a/gcc/testsuite/gcc.target/riscv/arch-31.c +++ b/gcc/testsuite/gcc.target/riscv/arch-31.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv32i_zvfbfmin -mabi=ilp32f" } */ +/* { dg-options "-march=rv32im_zvfbfmin -mabi=ilp32f" } */ int foo() { } diff --git a/gcc/testsuite/gcc.target/riscv/arch-32.c b/gcc/testsuite/gcc.target/riscv/arch-32.c index 496168325129..49a3db794892 100644 --- a/gcc/testsuite/gcc.target/riscv/arch-32.c +++ b/gcc/testsuite/gcc.target/riscv/arch-32.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64iv_zvfbfmin -mabi=lp64d" } */ +/* { dg-options "-march=rv64imv_zvfbfmin -mabi=lp64d" } */ int foo() { } diff --git a/gcc/testsuite/gcc.target/riscv/arch-37.c b/gcc/testsuite/gcc.target/riscv/arch-37.c index 5b19a73c5567..b56ba77b973e 100644 --- a/gcc/testsuite/gcc.target/riscv/arch-37.c +++ b/gcc/testsuite/gcc.target/riscv/arch-37.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv32i_zvfbfwma -mabi=ilp32f" } */ +/* { dg-options "-march=rv32im_zvfbfwma -mabi=ilp32f" } */ int foo () {} diff --git a/gcc/testsuite/gcc.target/riscv/arch-38.c b/gcc/testsuite/gcc
[gcc r14-9972] RISC-V: Add VLS to mask vec_extract [PR114668].
https://gcc.gnu.org/g:02cc8f3e68f9af96d484d9946ceaa9e3eed38151 commit r14-9972-g02cc8f3e68f9af96d484d9946ceaa9e3eed38151 Author: Robin Dapp Date: Mon Apr 15 12:44:56 2024 +0200 RISC-V: Add VLS to mask vec_extract [PR114668]. This adds the missing VLS modes to the mask extract expanders. gcc/ChangeLog: PR target/114668 * config/riscv/autovec.md: Add VLS. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr114668.c: New test. Diff: --- gcc/config/riscv/autovec.md| 4 +-- .../gcc.target/riscv/rvv/autovec/pr114668.c| 35 ++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 3b32369f68c..aa1ae0fe075 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -1427,7 +1427,7 @@ (define_expand "vec_extractqi" [(set (match_operand:QI0 "register_operand") (vec_select:QI - (match_operand:VB 1 "register_operand") + (match_operand:VB_VLS 1 "register_operand") (parallel [(match_operand 2 "nonmemory_operand")])))] "TARGET_VECTOR" @@ -1453,7 +1453,7 @@ (define_expand "vec_extractbi" [(set (match_operand:QI0 "register_operand") (vec_select:QI - (match_operand:VB 1 "register_operand") + (match_operand:VB_VLS 1 "register_operand") (parallel [(match_operand 2 "nonmemory_operand")])))] "TARGET_VECTOR" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114668.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114668.c new file mode 100644 index 000..3a13c3c0012 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114668.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ +/* { dg-require-effective-target riscv_v } */ +/* { dg-options { -O3 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d } } */ + +char a; +int b; +short e[14]; +char f[4][12544]; +_Bool c[4][5]; + +__attribute__ ((noipa)) +void foo (int a) +{ + if (a != 1) +__builtin_abort (); +} + +int main () +{ + for (int i = 0; i < 4; ++i) +for (int l = 0; l < 15; ++l) + for (int m = 0; m < 15; ++m) + f[i][l * m] = 3; + for (int j = 0; j < 4; j += 1) +for (int k = 3; k < 13; k += 3) + for (_Bool l = 0; l < 1; l = 1) + for (int m = 0; m < 4; m += 1) + { + a = 0; + b -= e[k]; + c[j][m] = f[j][6]; + } + for (long i = 2; i < 4; ++i) +foo (c[3][3]); +}
[gcc r15-2337] RISC-V: Work around bare apostrophe in error string.
https://gcc.gnu.org/g:3f2bf415b447a0f6bc424c688b06e1f5946688a0 commit r15-2337-g3f2bf415b447a0f6bc424c688b06e1f5946688a0 Author: Robin Dapp Date: Fri Jul 26 12:58:38 2024 +0200 RISC-V: Work around bare apostrophe in error string. An unquoted apostrophe slipped through when testing the recent V/M extension patch. This, again, re-words the message to "Currently the 'V' implementation requires the 'M' extension". Going to commit as obvious after testing. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_override_options_internal): Reword error string without apostrophe. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr116036.c: Adjust expected error string. Diff: --- gcc/config/riscv/riscv.cc | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/pr116036.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 2bb7f2aace1b..a490b9598b04 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -9694,7 +9694,7 @@ riscv_override_options_internal (struct gcc_options *opts) /* We might use a multiplication to calculate the scalable vector length at runtime. Therefore, require the M extension. */ if (TARGET_VECTOR && !TARGET_MUL) -sorry ("GCC's current % implementation requires the % extension"); +sorry ("Currently the % implementation requires the % extension"); /* Likewise floating-point division and square root. */ if ((TARGET_HARD_FLOAT_OPTS_P (opts) || TARGET_ZFINX_OPTS_P (opts)) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116036.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116036.c index a72209593f39..7b39291a91ad 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr116036.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr116036.c @@ -8,4 +8,4 @@ void init() { a[i_0][i_1] = 1; } -/* { dg-excess-errors "sorry, unimplemented: GCC's current 'V' implementation requires the 'M' extension" } */ +/* { dg-excess-errors "sorry, unimplemented: Currently the 'V' implementation requires the 'M' extension" } */
[gcc r15-2649] RISC-V: Correct mode_idx attribute for viwalu wx variants [PR116149].
https://gcc.gnu.org/g:f15cd1802129454029f7fcc8ee3ddd56a86cdad8 commit r15-2649-gf15cd1802129454029f7fcc8ee3ddd56a86cdad8 Author: Robin Dapp Date: Wed Jul 31 16:54:03 2024 +0200 RISC-V: Correct mode_idx attribute for viwalu wx variants [PR116149]. In PR116149 we choose a wrong vector length which causes wrong values in a reduction. The problem happens in avlprop where we choose the number of units in the instruction's mode as vector length. For the non-scalar variants the respective operand has the correct non-widened mode. For the scalar variants, however, the same operand has a scalar mode which obviously only has one unit. This makes us choose VL = 1 leaving three elements undisturbed (so potentially -1). Those end up in the reduction causing the wrong result. This patch adjusts the mode_idx just for the scalar variants of the affected instruction patterns. gcc/ChangeLog: PR target/116149 * config/riscv/vector.md: Fix mode_idx attribute of scalar widen add/sub variants. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr116149.c: New test. Diff: --- gcc/config/riscv/vector.md| 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116149.c | 18 ++ 2 files changed, 20 insertions(+) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index bcedf3d79e26..d4d9bd87e91d 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -4016,6 +4016,7 @@ "TARGET_VECTOR" "vwadd.wx\t%0,%3,%z4%p1" [(set_attr "type" "viwalu") + (set_attr "mode_idx" "3") (set_attr "mode" "")]) (define_insn "@pred_single_widen_sub_extended_scalar" @@ -4038,6 +4039,7 @@ "TARGET_VECTOR" "vwsub.wx\t%0,%3,%z4%p1" [(set_attr "type" "viwalu") + (set_attr "mode_idx" "3") (set_attr "mode" "")]) (define_insn "@pred_widen_mulsu" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116149.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116149.c new file mode 100644 index ..4f5927b96fea --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116149.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl" } */ + +long a; +short b[6]; +short c[20]; +int main() { + for (short d = 0; d < 20; d += 3) { +c[d] = 0; +for (int e = 0; e < 20; e += 2) + for (int f = 1; f < 20; f += 2) +a += (unsigned)b[f + e]; + } + if (a != 0) +__builtin_abort (); +} + +/* { dg-final { scan-assembler-times "vsetivli\tzero,1" 0 } } */
[gcc r15-951] RISC-V: Do not allow v0 as dest when merging [PR115068].
https://gcc.gnu.org/g:a2fd0812a54cf51520f15e900df4cfb5874b75ed commit r15-951-ga2fd0812a54cf51520f15e900df4cfb5874b75ed Author: Robin Dapp Date: Mon May 13 13:49:57 2024 +0200 RISC-V: Do not allow v0 as dest when merging [PR115068]. This patch splits the vfw...wf pattern so we do not emit e.g. vfwadd.wf v0,v8,fa5,v0.t anymore. gcc/ChangeLog: PR target/115068 * config/riscv/vector.md: Split vfw.wf pattern. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr115068-run.c: New test. * gcc.target/riscv/rvv/base/pr115068.c: New test. Diff: --- gcc/config/riscv/vector.md | 20 +++ .../gcc.target/riscv/rvv/base/pr115068-run.c | 28 + gcc/testsuite/gcc.target/riscv/rvv/base/pr115068.c | 29 ++ 3 files changed, 67 insertions(+), 10 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index c8c9667eaa2..92bbb8ce6ae 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -7178,24 +7178,24 @@ (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))]) (define_insn "@pred_single_widen__scalar" - [(set (match_operand:VWEXTF 0 "register_operand" "=vr, vr") + [(set (match_operand:VWEXTF 0 "register_operand""=vd, vd, vr, vr") (if_then_else:VWEXTF (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") -(match_operand 5 "vector_length_operand" " rK, rK") -(match_operand 6 "const_int_operand" "i, i") -(match_operand 7 "const_int_operand" "i, i") -(match_operand 8 "const_int_operand" "i, i") -(match_operand 9 "const_int_operand" "i, i") + [(match_operand: 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") +(match_operand 5 "vector_length_operand" " rK, rK, rK, rK") +(match_operand 6 "const_int_operand" " i, i, i, i") +(match_operand 7 "const_int_operand" " i, i, i, i") +(match_operand 8 "const_int_operand" " i, i, i, i") +(match_operand 9 "const_int_operand" " i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) (plus_minus:VWEXTF - (match_operand:VWEXTF 3 "register_operand" " vr, vr") + (match_operand:VWEXTF 3 "register_operand"" vr, vr, vr, vr") (float_extend:VWEXTF (vec_duplicate: - (match_operand: 4 "register_operand" "f, f" - (match_operand:VWEXTF 2 "vector_merge_operand" " vu, 0")))] + (match_operand: 4 "register_operand" " f, f, f, f" + (match_operand:VWEXTF 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "vfw.wf\t%0,%3,%4%p1" [(set_attr "type" "vf") diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr115068-run.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115068-run.c new file mode 100644 index 000..95ec8e06021 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115068-run.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-add-options riscv_v } */ +/* { dg-additional-options "-std=gnu99" } */ + +#include +#include + +vfloat64m8_t +test_vfwadd_wf_f64m8_m (vbool8_t vm, vfloat64m8_t vs2, float rs1, size_t vl) +{ + return __riscv_vfwadd_wf_f64m8_m (vm, vs2, rs1, vl); +} + +char global_memory[1024]; +void *fake_memory = (void *) global_memory; + +int +main () +{ + asm volatile ("fence" ::: "memory"); + vfloat64m8_t vfwadd_wf_f64m8_m_vd = test_vfwadd_wf_f64m8_m ( +__riscv_vreinterpret_v_i8m1_b8 (__riscv_vundefined_i8m1 ()), +__riscv_vundefined_f64m8 (), 1.0, __riscv_vsetvlmax_e64m8 ()); + asm volatile ("" ::"vr"(vfwadd_wf_f64m8_m_vd) : "memory"); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr115068.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115068.c new file mode 100644 index 000..6d680037aa1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115068.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-add-options riscv_v } */ +/* { dg-additional-options "-std=gnu99" } */ + +#include +#include + +vfloat64m8_t +test_vfwadd_wf_f64m8_m (vbool8_t vm, vfloat64m8_t vs2, float rs1, size_t vl) +{ + return __riscv_vfwadd_wf_f64m8_m (vm, vs2, rs1, vl); +} + +char global_memory[1024]; +void *fake_memory = (void *) global_memory; + +int +main () +{ + asm volatile ("fence" ::: "memory"); + vfloat64m8_t vfwadd_wf_f64m8_m_vd = test_vfwadd_wf
[gcc r15-952] RISC-V: Split vwadd.wx and vwsub.wx and add helpers.
https://gcc.gnu.org/g:9781885a624f3e29634d95c14cd10940cefb1a5a commit r15-952-g9781885a624f3e29634d95c14cd10940cefb1a5a Author: Robin Dapp Date: Thu May 16 12:43:43 2024 +0200 RISC-V: Split vwadd.wx and vwsub.wx and add helpers. vwadd.wx and vwsub.wx have the same problem vfwadd.wf had. This patch splits the insn pattern in the same way vfwadd.wf was split. It also adds two patterns to recognize extended scalars. In practice those do not provide a lot of improvement over what we already have but in some instances we can get rid of redundant extensions. gcc/ChangeLog: * config/riscv/vector.md: Split vwadd.wx/vwsub.wx pattern and add extended_scalar patterns. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr115068.c: Add vwadd.wx/vwsub.wx tests. * gcc.target/riscv/rvv/base/pr115068-run.c: Include pr115068.c. * gcc.target/riscv/rvv/base/vwaddsub-1.c: New test. Diff: --- gcc/config/riscv/vector.md | 62 ++ .../gcc.target/riscv/rvv/base/pr115068-run.c | 24 + gcc/testsuite/gcc.target/riscv/rvv/base/pr115068.c | 26 + .../gcc.target/riscv/rvv/base/vwaddsub-1.c | 48 + 4 files changed, 128 insertions(+), 32 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 92bbb8ce6ae..dccf76f0003 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -3877,27 +3877,71 @@ (set_attr "mode" "")]) (define_insn "@pred_single_widen__scalar" - [(set (match_operand:VWEXTI 0 "register_operand" "=vr, vr") + [(set (match_operand:VWEXTI 0 "register_operand" "=vd,vd, vr, vr") (if_then_else:VWEXTI (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") -(match_operand 5 "vector_length_operand" " rK, rK") -(match_operand 6 "const_int_operand" "i, i") -(match_operand 7 "const_int_operand" "i, i") -(match_operand 8 "const_int_operand" "i, i") + [(match_operand: 1 "vector_mask_operand" " vm,vm,Wc1,Wc1") +(match_operand 5 "vector_length_operand" " rK,rK, rK, rK") +(match_operand 6 "const_int_operand" " i, i, i, i") +(match_operand 7 "const_int_operand" " i, i, i, i") +(match_operand 8 "const_int_operand" " i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (plus_minus:VWEXTI - (match_operand:VWEXTI 3 "register_operand" " vr, vr") + (match_operand:VWEXTI 3 "register_operand" " vr,vr, vr, vr") (any_extend:VWEXTI (vec_duplicate: - (match_operand: 4 "reg_or_0_operand" " rJ, rJ" - (match_operand:VWEXTI 2 "vector_merge_operand" " vu, 0")))] + (match_operand: 4 "reg_or_0_operand" " rJ,rJ, rJ, rJ" + (match_operand:VWEXTI 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "vw.wx\t%0,%3,%z4%p1" [(set_attr "type" "vi") (set_attr "mode" "")]) +(define_insn "@pred_single_widen_add_extended_scalar" + [(set (match_operand:VWEXTI 0 "register_operand" "=vd,vd, vr, vr") + (if_then_else:VWEXTI + (unspec: + [(match_operand: 1 "vector_mask_operand" " vm,vm,Wc1,Wc1") +(match_operand 5 "vector_length_operand" " rK,rK, rK, rK") +(match_operand 6 "const_int_operand" " i, i, i, i") +(match_operand 7 "const_int_operand" " i, i, i, i") +(match_operand 8 "const_int_operand" " i, i, i, i") +(reg:SI VL_REGNUM) +(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:VWEXTI + (vec_duplicate:VWEXTI + (any_extend: + (match_operand: 4 "reg_or_0_operand" " rJ,rJ, rJ, rJ"))) + (match_operand:VWEXTI 3 "register_operand" " vr,vr, vr, vr")) + (match_operand:VWEXTI 2 "vector_merge_operand" " vu, 0, vu, 0")))] + "TARGET_VECTOR" + "vwadd.wx\t%0,%3,%z4%p1" + [(set_attr "type" "viwalu") + (set_attr "mode" "")]) + +(define_insn "@pred_single_widen_sub_extended_scalar" + [(set (match_operand:VWEXTI 0 "register_operand" "=vd,vd, vr, vr") + (if_then_else:VWEXTI + (unspec: + [(match_operand: 1 "vector_mask_operand" " vm,vm,Wc1,Wc1") +(match_operand 5 "vector_length_operand" " rK,rK, rK, rK") +(
[gcc r15-953] RISC-V: Add vwsll combine helpers.
https://gcc.gnu.org/g:af4bf422a699de0e7af5a26e02997d313e7301a6 commit r15-953-gaf4bf422a699de0e7af5a26e02997d313e7301a6 Author: Robin Dapp Date: Mon May 13 22:09:35 2024 +0200 RISC-V: Add vwsll combine helpers. This patch enables the usage of vwsll in autovec context by adding the necessary combine patterns and tests. gcc/ChangeLog: * config/riscv/autovec-opt.md (*vwsll_zext1_): New pattern. (*vwsll_zext2_): Ditto. (*vwsll_zext1_scalar_): Ditto. (*vwsll_zext1_trunc_): Ditto. (*vwsll_zext2_trunc_): Ditto. (*vwsll_zext1_trunc_scalar_): Ditto. * config/riscv/vector-crypto.md: Make pattern similar to other narrowing/widening patterns. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vwsll-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vwsll-run.c: New test. * gcc.target/riscv/rvv/autovec/binop/vwsll-template.h: New test. Diff: --- gcc/config/riscv/autovec-opt.md| 126 - gcc/config/riscv/vector-crypto.md | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vwsll-1.c | 10 ++ .../gcc.target/riscv/rvv/autovec/binop/vwsll-run.c | 67 +++ .../riscv/rvv/autovec/binop/vwsll-template.h | 49 5 files changed, 251 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 04f85d8e455..bc6af042bcf 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1467,5 +1467,127 @@ operands, operands[4]); DONE; } - [(set_attr "type" "vector")] -) + [(set_attr "type" "vector")]) + +;; vzext.vf2 + vsll = vwsll. +(define_insn_and_split "*vwsll_zext1_" + [(set (match_operand:VWEXTI 0"register_operand" "=vr ") + (ashift:VWEXTI + (zero_extend:VWEXTI + (match_operand: 1 "register_operand" " vr ")) + (match_operand: 2 "vector_shift_operand" "vrvk")))] + "TARGET_ZVBB && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { +insn_code icode = code_for_pred_vwsll (mode); +riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands); +DONE; + } + [(set_attr "type" "vwsll")]) + +(define_insn_and_split "*vwsll_zext2_" + [(set (match_operand:VWEXTI 0"register_operand" "=vr ") + (ashift:VWEXTI + (zero_extend:VWEXTI + (match_operand: 1 "register_operand" " vr ")) + (zero_extend:VWEXTI + (match_operand: 2 "vector_shift_operand" "vrvk"] + "TARGET_ZVBB && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { +insn_code icode = code_for_pred_vwsll (mode); +riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands); +DONE; + } + [(set_attr "type" "vwsll")]) + + +(define_insn_and_split "*vwsll_zext1_scalar_" + [(set (match_operand:VWEXTI 0"register_operand" "=vr") + (ashift:VWEXTI + (zero_extend:VWEXTI + (match_operand: 1 "register_operand"" vr")) + (match_operand:2 "vector_scalar_shift_operand" " rK")))] + "TARGET_ZVBB && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { +if (GET_CODE (operands[2]) == SUBREG) + operands[2] = SUBREG_REG (operands[2]); +insn_code icode = code_for_pred_vwsll_scalar (mode); +riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands); +DONE; + } + [(set_attr "type" "vwsll")]) + +;; For +;; uint16_t dst; +;; uint8_t a, b; +;; dst = vwsll (a, b) +;; we seem to create +;; aa = (int) a; +;; bb = (int) b; +;; dst = (short) vwsll (aa, bb); +;; The following patterns help to combine this idiom into one vwsll. + +(define_insn_and_split "*vwsll_zext1_trunc_" + [(set (match_operand: 0 "register_operand""=vr ") +(truncate: + (ashift:VQEXTI + (zero_extend:VQEXTI + (match_operand: 1 "register_operand" " vr ")) + (match_operand:VQEXTI 2 "vector_shift_operand" "vrvk"] + "TARGET_ZVBB && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { +insn_code icode = code_for_pred_vwsll (mode); +riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands); +DONE; + } + [(set_attr "type" "vwsll")]) + +(define_insn_and_split "*vwsll_zext2_trunc_" + [(set (match_operand: 0 "register_operand""=vr ") +(truncate: + (ashift:VQEXTI + (zero_extend:VQEXTI + (match_operand: 1 "register_operand" " vr ")) + (zero_extend:VQEXTI + (match_operand: 2 "vector_shift_operand" "vrvk")] + "TARGET_ZVBB && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { +insn_code icode = code_for_pred_vwsll (mode); +riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY
[gcc r15-954] RISC-V: Use widening shift for scatter/gather if applicable.
https://gcc.gnu.org/g:309ee005aa871286c8daccbce7586f82be347440 commit r15-954-g309ee005aa871286c8daccbce7586f82be347440 Author: Robin Dapp Date: Fri May 10 13:37:03 2024 +0200 RISC-V: Use widening shift for scatter/gather if applicable. With the zvbb extension we can emit a widening shift for scatter/gather index preparation in case we need to multiply by 2 and zero extend. The patch also adds vwsll to the mode_idx attribute and removes the mode from shift-count operand of the insn pattern. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_gather_scatter): Use vwsll if applicable. * config/riscv/vector-crypto.md: Remove mode from vwsll shift count operator. * config/riscv/vector.md: Add vwsll to mode iterator. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Add zvbb. * gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c: New test. Diff: --- gcc/config/riscv/riscv-v.cc| 42 +--- gcc/config/riscv/vector-crypto.md | 4 +- gcc/config/riscv/vector.md | 4 +- .../gather-scatter/gather_load_64-12-zvbb.c| 113 + gcc/testsuite/lib/target-supports.exp | 48 - 5 files changed, 193 insertions(+), 18 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index f105f470495..9428beca268 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4016,7 +4016,7 @@ expand_gather_scatter (rtx *ops, bool is_load) { rtx ptr, vec_offset, vec_reg; bool zero_extend_p; - int scale_log2; + int shift; rtx mask = ops[5]; rtx len = ops[6]; if (is_load) @@ -4025,7 +4025,7 @@ expand_gather_scatter (rtx *ops, bool is_load) ptr = ops[1]; vec_offset = ops[2]; zero_extend_p = INTVAL (ops[3]); - scale_log2 = exact_log2 (INTVAL (ops[4])); + shift = exact_log2 (INTVAL (ops[4])); } else { @@ -4033,7 +4033,7 @@ expand_gather_scatter (rtx *ops, bool is_load) ptr = ops[0]; vec_offset = ops[1]; zero_extend_p = INTVAL (ops[2]); - scale_log2 = exact_log2 (INTVAL (ops[3])); + shift = exact_log2 (INTVAL (ops[3])); } machine_mode vec_mode = GET_MODE (vec_reg); @@ -4043,9 +4043,12 @@ expand_gather_scatter (rtx *ops, bool is_load) poly_int64 nunits = GET_MODE_NUNITS (vec_mode); bool is_vlmax = is_vlmax_len_p (vec_mode, len); + bool use_widening_shift = false; + /* Extend the offset element to address width. */ if (inner_offsize < BITS_PER_WORD) { + use_widening_shift = TARGET_ZVBB && zero_extend_p && shift == 1; /* 7.2. Vector Load/Store Addressing Modes. If the vector offset elements are narrower than XLEN, they are zero-extended to XLEN before adding to the ptr effective address. If @@ -4054,8 +4057,8 @@ expand_gather_scatter (rtx *ops, bool is_load) raise an illegal instruction exception if the EEW is not supported for offset elements. -RVV spec only refers to the scale_log == 0 case. */ - if (!zero_extend_p || scale_log2 != 0) +RVV spec only refers to the shift == 0 case. */ + if (!zero_extend_p || shift) { if (zero_extend_p) inner_idx_mode @@ -4064,19 +4067,32 @@ expand_gather_scatter (rtx *ops, bool is_load) inner_idx_mode = int_mode_for_size (BITS_PER_WORD, 0).require (); machine_mode new_idx_mode = get_vector_mode (inner_idx_mode, nunits).require (); - rtx tmp = gen_reg_rtx (new_idx_mode); - emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, idx_mode, - zero_extend_p ? true : false)); - vec_offset = tmp; + if (!use_widening_shift) + { + rtx tmp = gen_reg_rtx (new_idx_mode); + emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, idx_mode, + zero_extend_p ? true : false)); + vec_offset = tmp; + } idx_mode = new_idx_mode; } } - if (scale_log2 != 0) + if (shift) { - rtx tmp = expand_binop (idx_mode, ashl_optab, vec_offset, - gen_int_mode (scale_log2, Pmode), NULL_RTX, 0, - OPTAB_DIRECT); + rtx tmp; + if (!use_widening_shift) + tmp = expand_binop (idx_mode, ashl_optab, vec_offset, + gen_int_mode (shift, Pmode), NULL_RTX, 0, + OPTAB_DIRECT); + else + { + tmp = gen_reg_rtx (idx_mode); + insn_code icode = code_for_pred_vwsll_scalar (idx_mode); + rtx ops[] = {tmp, vec_offset, const1_rtx}; + emit_vlmax_insn (icode, BINARY_OP, ops); + } + vec_offset = tmp; } diff -
[gcc r15-955] RISC-V: Add vandn combine helper.
https://gcc.gnu.org/g:f48448276f29a3823827292c72b7fc8e9cd39e1e commit r15-955-gf48448276f29a3823827292c72b7fc8e9cd39e1e Author: Robin Dapp Date: Wed May 15 15:01:35 2024 +0200 RISC-V: Add vandn combine helper. This patch adds a combine pattern for vandn as well as tests for it. gcc/ChangeLog: * config/riscv/autovec-opt.md (*vandn_): New pattern. * config/riscv/vector.md: Add vandn to mode_idx. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vandn-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vandn-run.c: New test. * gcc.target/riscv/rvv/autovec/binop/vandn-template.h: New test. Diff: --- gcc/config/riscv/autovec-opt.md| 18 gcc/config/riscv/vector.md | 2 +- .../gcc.target/riscv/rvv/autovec/binop/vandn-1.c | 8 .../gcc.target/riscv/rvv/autovec/binop/vandn-run.c | 54 ++ .../riscv/rvv/autovec/binop/vandn-template.h | 38 +++ 5 files changed, 119 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index bc6af042bcf..6a2eabbd854 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1591,3 +1591,21 @@ DONE; } [(set_attr "type" "vwsll")]) + +;; vnot + vand = vandn. +(define_insn_and_split "*vandn_" + [(set (match_operand:V_VLSI 0 "register_operand" "=vr") + (and:V_VLSI +(not:V_VLSI + (match_operand:V_VLSI 2 "register_operand" "vr")) +(match_operand:V_VLSI1 "register_operand" "vr")))] + "TARGET_ZVBB && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { +insn_code icode = code_for_pred_vandn (mode); +riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands); +DONE; + } + [(set_attr "type" "vandn")]) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 69423be6917..c15af17ec62 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -743,7 +743,7 @@ vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,\ vfcvtitof,vfncvtitof,vfncvtftoi,vfncvtftof,vmalu,vmiota,vmidx,\ vimovxv,vfmovfv,vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,\ - vgather,vcompress,vmov,vnclip,vnshift") + vgather,vcompress,vmov,vnclip,vnshift,vandn") (const_int 0) (eq_attr "type" "vimovvx,vfmovvf") diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-1.c new file mode 100644 index 000..3bb5bf8dd5b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-1.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvbb" } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */ + +#include "vandn-template.h" + +/* { dg-final { scan-assembler-times {\tvandn\.vv} 8 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-run.c new file mode 100644 index 000..243c5975068 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vandn-run.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-require-effective-target "riscv_zvbb_ok" } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvbb" } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */ + +#include "vandn-template.h" + +#include + +#define SZ 512 + +#define RUN(TYPE, VAL) \ + TYPE a##TYPE[SZ]; \ + TYPE b##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ +{ \ + a##TYPE[i] = 123; \ + b##TYPE[i] = VAL; \ +} \ + vandn_##TYPE (a##TYPE, a##TYPE, b##TYPE, SZ); \ + for (int i = 0; i < SZ; i++) \ +assert (a##TYPE[i] == (TYPE) (123 & ~VAL)); + +#define RUN2(TYPE, VAL) \ + TYPE as##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ +as##TYPE[i] = 123; \ + vandns_##TYPE (as##TYPE, as##TYPE, VAL, SZ);
[gcc r15-956] RISC-V: Add vector popcount, clz, ctz.
https://gcc.gnu.org/g:6fa4b0135439d64c0ea1816594d7dc830e836376 commit r15-956-g6fa4b0135439d64c0ea1816594d7dc830e836376 Author: Robin Dapp Date: Wed May 15 17:41:07 2024 +0200 RISC-V: Add vector popcount, clz, ctz. This patch adds the zvbb vcpop, vclz and vctz to the autovec machinery as well as tests for them. gcc/ChangeLog: * config/riscv/autovec.md (ctz2): New expander. (clz2): Ditto. * config/riscv/generic-vector-ooo.md: Add bitmanip ops to insn reservation. * config/riscv/vector-crypto.md: Add VLS modes to insns. * config/riscv/vector.md: Add bitmanip ops to mode_idx and other attributes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/popcount-1.c: Adjust check for zvbb. * gcc.target/riscv/rvv/autovec/unop/popcount-run-1.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/popcount-2.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/popcount-3.c: New test. * gcc.target/riscv/rvv/autovec/unop/popcount-template.h: New test. * gcc.target/riscv/rvv/autovec/unop/clz-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/clz-run.c: New test. * gcc.target/riscv/rvv/autovec/unop/clz-template.h: New test. * gcc.target/riscv/rvv/autovec/unop/ctz-1.c: New test. * gcc.target/riscv/rvv/autovec/unop/ctz-run.c: New test. * gcc.target/riscv/rvv/autovec/unop/ctz-template.h: New test. Diff: --- gcc/config/riscv/autovec.md| 30 - gcc/config/riscv/generic-vector-ooo.md | 2 +- gcc/config/riscv/vector-crypto.md | 137 +++-- gcc/config/riscv/vector.md | 14 +-- .../gcc.target/riscv/rvv/autovec/unop/clz-1.c | 8 ++ .../gcc.target/riscv/rvv/autovec/unop/clz-run.c| 36 ++ .../riscv/rvv/autovec/unop/clz-template.h | 21 .../gcc.target/riscv/rvv/autovec/unop/ctz-1.c | 8 ++ .../gcc.target/riscv/rvv/autovec/unop/ctz-run.c| 36 ++ .../riscv/rvv/autovec/unop/ctz-template.h | 21 .../gcc.target/riscv/rvv/autovec/unop/popcount-1.c | 4 +- .../gcc.target/riscv/rvv/autovec/unop/popcount-2.c | 4 +- .../gcc.target/riscv/rvv/autovec/unop/popcount-3.c | 8 ++ .../riscv/rvv/autovec/unop/popcount-run-1.c| 3 +- .../riscv/rvv/autovec/unop/popcount-template.h | 21 15 files changed, 272 insertions(+), 81 deletions(-) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 87d4171bc89..15db26d52c6 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -1566,7 +1566,7 @@ }) ;; --- -;; - [INT] POPCOUNT. +;; - [INT] POPCOUNT, CTZ and CLZ. ;; --- (define_expand "popcount2" @@ -1574,10 +1574,36 @@ (match_operand:V_VLSI 1 "register_operand")] "TARGET_VECTOR" { - riscv_vector::expand_popcount (operands); + if (!TARGET_ZVBB) +riscv_vector::expand_popcount (operands); + else +{ + riscv_vector::emit_vlmax_insn (code_for_pred_v (POPCOUNT, mode), +riscv_vector::CPOP_OP, operands); +} DONE; }) +(define_expand "ctz2" + [(match_operand:V_VLSI 0 "register_operand") + (match_operand:V_VLSI 1 "register_operand")] + "TARGET_ZVBB" + { +riscv_vector::emit_vlmax_insn (code_for_pred_v (CTZ, mode), + riscv_vector::CPOP_OP, operands); +DONE; +}) + +(define_expand "clz2" + [(match_operand:V_VLSI 0 "register_operand") + (match_operand:V_VLSI 1 "register_operand")] + "TARGET_ZVBB" + { +riscv_vector::emit_vlmax_insn (code_for_pred_v (CLZ, mode), + riscv_vector::CPOP_OP, operands); +DONE; +}) + ;; - ;; [INT] Highpart multiplication diff --git a/gcc/config/riscv/generic-vector-ooo.md b/gcc/config/riscv/generic-vector-ooo.md index 96cb1a0be29..5e933c83841 100644 --- a/gcc/config/riscv/generic-vector-ooo.md +++ b/gcc/config/riscv/generic-vector-ooo.md @@ -74,7 +74,7 @@ ;; Vector crypto, assumed to be a generic operation for now. (define_insn_reservation "vec_crypto" 4 - (eq_attr "type" "crypto") + (eq_attr "type" "crypto,vclz,vctz,vcpop") "vxu_ooo_issue,vxu_ooo_alu") ;; Vector crypto, AES diff --git a/gcc/config/riscv/vector-crypto.md b/gcc/config/riscv/vector-crypto.md index 0ddc2f3f3c6..17432b15815 100755 --- a/gcc/config/riscv/vector-crypto.md +++ b/gcc/config/riscv/vector-crypto.md @@ -99,42 +99,43 @@ ;; vror.vv vror.vx vror.vi ;; vwsll.vv vwsll.vx vwsll.vi (define_insn "@pred_vandn" - [(set (match_operand:VI 0 "register_operand" "=vd, vr, vd, v
[gcc r15-957] RISC-V: Remove dead perm series code and document.
https://gcc.gnu.org/g:30cfdd6ff56972d9d1b9dbdd43a8333c85618775 commit r15-957-g30cfdd6ff56972d9d1b9dbdd43a8333c85618775 Author: Robin Dapp Date: Fri May 17 12:48:52 2024 +0200 RISC-V: Remove dead perm series code and document. With the introduction of shuffle_series_patterns the explicit handler code for a perm series is dead. This patch removes it and also adds a function-level comment to shuffle_series_patterns. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_const_vector): Document. (shuffle_extract_and_slide1up_patterns): Remove. Diff: --- gcc/config/riscv/riscv-v.cc | 26 -- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 9428beca268..948aaf7d8dd 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1485,28 +1485,6 @@ expand_const_vector (rtx target, rtx src) emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops); } } - else if (npatterns == 1 && nelts_per_pattern == 3) - { - /* Generate the following CONST_VECTOR: -{ base0, base1, base1 + step, base1 + step * 2, ... } */ - rtx base0 = builder.elt (0); - rtx base1 = builder.elt (1); - rtx base2 = builder.elt (2); - - rtx step = simplify_binary_operation (MINUS, builder.inner_mode (), - base2, base1); - - /* Step 1 - { base1, base1 + step, base1 + step * 2, ... } */ - rtx tmp = gen_reg_rtx (mode); - expand_vec_series (tmp, base1, step); - /* Step 2 - { base0, base1, base1 + step, base1 + step * 2, ... } */ - if (!rtx_equal_p (base0, const0_rtx)) - base0 = force_reg (builder.inner_mode (), base0); - - insn_code icode = optab_handler (vec_shl_insert_optab, mode); - gcc_assert (icode != CODE_FOR_nothing); - emit_insn (GEN_FCN (icode) (target, tmp, base0)); - } else /* TODO: We will enable more variable-length vector in the future. */ gcc_unreachable (); @@ -3580,6 +3558,10 @@ shuffle_extract_and_slide1up_patterns (struct expand_vec_perm_d *d) return true; } +/* This looks for a series pattern in the provided vector permute structure D. + If successful it emits a series insn as well as a gather to implement it. + Return true if successful, false otherwise. */ + static bool shuffle_series_patterns (struct expand_vec_perm_d *d) {
[gcc r15-1042] RISC-V: Introduce -mvector-strict-align.
https://gcc.gnu.org/g:68b0742a49de7122d5023f0bf46460ff2fb3e3dd commit r15-1042-g68b0742a49de7122d5023f0bf46460ff2fb3e3dd Author: Robin Dapp Date: Tue May 28 21:19:26 2024 +0200 RISC-V: Introduce -mvector-strict-align. this patch disables movmisalign by default and introduces the -mno-vector-strict-align option to override it and re-enable movmisalign. For now, generic-ooo is the only uarch that supports misaligned vector access. The patch also adds a check_effective_target_riscv_v_misalign_ok to the testsuite which enables or disables the vector misalignment tests depending on whether the target under test can execute a misaligned vle32. Changes from v3: - Adressed Kito's comments. - Made -mscalar-strict-align a real alias. gcc/ChangeLog: * config/riscv/riscv-opts.h (TARGET_VECTOR_MISALIGN_SUPPORTED): Move from here... * config/riscv/riscv.h (TARGET_VECTOR_MISALIGN_SUPPORTED): ...to here and map to riscv_vector_unaligned_access_p. * config/riscv/riscv.opt: Add -mvector-strict-align. * config/riscv/riscv.cc (struct riscv_tune_param): Add vector_unaligned_access. (riscv_override_options_internal): Set riscv_vector_unaligned_access_p. * doc/invoke.texi: Document -mvector-strict-align. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Add check_effective_target_riscv_v_misalign_ok. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-7.c: Add -mno-vector-strict-align. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/misalign-1.c: Ditto. Diff: --- gcc/config/riscv/riscv-opts.h | 3 -- gcc/config/riscv/riscv.cc | 19 gcc/config/riscv/riscv.h | 5 gcc/config/riscv/riscv.opt | 8 + gcc/doc/invoke.texi| 17 +++ .../vect/costmodel/riscv/rvv/dynamic-lmul2-7.c | 2 +- .../vect/costmodel/riscv/rvv/vla_vs_vls-10.c | 2 +- .../vect/costmodel/riscv/rvv/vla_vs_vls-11.c | 2 +- .../vect/costmodel/riscv/rvv/vla_vs_vls-12.c | 2 +- .../gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c | 2 +- .../gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c | 2 +- .../gcc.target/riscv/rvv/autovec/vls/misalign-1.c | 2 +- gcc/testsuite/lib/target-supports.exp | 35 -- 13 files changed, 89 insertions(+), 12 deletions(-) diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 1b2dd5757a8..f58a07abffc 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -147,9 +147,6 @@ enum rvv_vector_bits_enum { ? 0 \ : 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1)) -/* TODO: Enable RVV movmisalign by default for now. */ -#define TARGET_VECTOR_MISALIGN_SUPPORTED 1 - /* The maximmum LMUL according to user configuration. */ #define TARGET_MAX_LMUL \ (int) (rvv_max_lmul == RVV_DYNAMIC ? RVV_M8 : rvv_max_lmul) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index c5c4c777349..9704ff9c6a0 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -288,6 +288,7 @@ struct riscv_tune_param unsigned short memory_cost; unsigned short fmv_cost; bool slow_unaligned_access; + bool vector_unaligned_access; bool use_divmod_expansion; bool overlap_op_by_pieces; unsigned int fusible_ops; @@ -300,6 +301,10 @@ struct riscv_tune_param /* Whether unaligned accesses execute very slowly. */ bool riscv_slow_unaligned_access_p; +/* Whether misaligned vector accesses are supported (i.e. do not + throw an exception). */ +bool riscv_vector_unaligned_access_p; + /* Whether user explicitly passed -mstrict-align. */ bool riscv_user_wants_strict_align; @@ -442,6 +447,7 @@ static const struct riscv_tune_param rocket_tune_info = { 5, /* memory_cost */ 8, /* fmv_cost */ true,/* slow_unaligned_access */ + false, /* vector_unaligned_access */ false, /* use_divmod_expansion */ false, /* overlap_op_by_pieces */ RISCV_FUSE_NOTHING,
[gcc r15-1043] check_GNU_style: Use raw strings.
https://gcc.gnu.org/g:03e1a7270314800eb33632f778401570e65345bd commit r15-1043-g03e1a7270314800eb33632f778401570e65345bd Author: Robin Dapp Date: Mon May 13 22:05:57 2024 +0200 check_GNU_style: Use raw strings. This silences some warnings when using check_GNU_style. contrib/ChangeLog: * check_GNU_style_lib.py: Use raw strings for regexps. Diff: --- contrib/check_GNU_style_lib.py | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/contrib/check_GNU_style_lib.py b/contrib/check_GNU_style_lib.py index f1a120fa8d3..6dbe4b53559 100755 --- a/contrib/check_GNU_style_lib.py +++ b/contrib/check_GNU_style_lib.py @@ -103,7 +103,7 @@ class SpacesCheck: class SpacesAndTabsMixedCheck: def __init__(self): -self.re = re.compile('\ \t') +self.re = re.compile(r'\ \t') def check(self, filename, lineno, line): stripped = line.lstrip() @@ -115,7 +115,7 @@ class SpacesAndTabsMixedCheck: class TrailingWhitespaceCheck: def __init__(self): -self.re = re.compile('(\s+)$') +self.re = re.compile(r'(\s+)$') def check(self, filename, lineno, line): assert(len(line) == 0 or line[-1] != '\n') @@ -128,7 +128,7 @@ class TrailingWhitespaceCheck: class SentenceSeparatorCheck: def __init__(self): -self.re = re.compile('\w\.(\s|\s{3,})\w') +self.re = re.compile(r'\w\.(\s|\s{3,})\w') def check(self, filename, lineno, line): m = self.re.search(line) @@ -140,7 +140,7 @@ class SentenceSeparatorCheck: class SentenceEndOfCommentCheck: def __init__(self): -self.re = re.compile('\w\.(\s{0,1}|\s{3,})\*/') +self.re = re.compile(r'\w\.(\s{0,1}|\s{3,})\*/') def check(self, filename, lineno, line): m = self.re.search(line) @@ -152,7 +152,7 @@ class SentenceEndOfCommentCheck: class SentenceDotEndCheck: def __init__(self): -self.re = re.compile('\w(\s*\*/)') +self.re = re.compile(r'\w(\s*\*/)') def check(self, filename, lineno, line): m = self.re.search(line) @@ -164,7 +164,7 @@ class SentenceDotEndCheck: class FunctionParenthesisCheck: # TODO: filter out GTY stuff def __init__(self): -self.re = re.compile('\w(\s{2,})?(\()') +self.re = re.compile(r'\w(\s{2,})?(\()') def check(self, filename, lineno, line): if '#define' in line: @@ -179,7 +179,7 @@ class FunctionParenthesisCheck: class SquareBracketCheck: def __init__(self): -self.re = re.compile('\w\s+(\[)') +self.re = re.compile(r'\w\s+(\[)') def check(self, filename, lineno, line): if filename.endswith('.md'): @@ -194,7 +194,7 @@ class SquareBracketCheck: class ClosingParenthesisCheck: def __init__(self): -self.re = re.compile('\S\s+(\))') +self.re = re.compile(r'\S\s+(\))') def check(self, filename, lineno, line): m = self.re.search(line) @@ -208,7 +208,7 @@ class BracesOnSeparateLineCheck: # This will give false positives for C99 compound literals. def __init__(self): -self.re = re.compile('(\)|else)\s*({)') +self.re = re.compile(r'(\)|else)\s*({)') def check(self, filename, lineno, line): m = self.re.search(line) @@ -219,7 +219,7 @@ class BracesOnSeparateLineCheck: class TrailinigOperatorCheck: def __init__(self): -regex = '^\s.*(([^a-zA-Z_]\*)|([-%<=&|^?])|([^*]/)|([^:][+]))$' +regex = r'^\s.*(([^a-zA-Z_]\*)|([-%<=&|^?])|([^*]/)|([^:][+]))$' self.re = re.compile(regex) def check(self, filename, lineno, line):
[gcc r15-1061] RISC-V: Regenerate opt urls.
https://gcc.gnu.org/g:037fc4d1012dc9d533862ef7e2c946249877dd71 commit r15-1061-g037fc4d1012dc9d533862ef7e2c946249877dd71 Author: Robin Dapp Date: Thu Jun 6 09:32:28 2024 +0200 RISC-V: Regenerate opt urls. I wasn't aware that I needed to regenerate the opt urls when adding an option. This patch does that. gcc/ChangeLog: * config/riscv/riscv.opt.urls: Regenerate. Diff: --- gcc/config/riscv/riscv.opt.urls | 6 ++ 1 file changed, 6 insertions(+) diff --git a/gcc/config/riscv/riscv.opt.urls b/gcc/config/riscv/riscv.opt.urls index d87e9d5c9a8..622cb6e7b44 100644 --- a/gcc/config/riscv/riscv.opt.urls +++ b/gcc/config/riscv/riscv.opt.urls @@ -47,6 +47,12 @@ UrlSuffix(gcc/RISC-V-Options.html#index-mcmodel_003d-4) mstrict-align UrlSuffix(gcc/RISC-V-Options.html#index-mstrict-align-4) +mscalar-strict-align +UrlSuffix(gcc/RISC-V-Options.html#index-mscalar-strict-align) + +mvector-strict-align +UrlSuffix(gcc/RISC-V-Options.html#index-mvector-strict-align) + ; skipping UrlSuffix for 'mexplicit-relocs' due to finding no URLs mrelax
[gcc r15-1187] vect: Merge loop mask and cond_op mask in fold-left reduction [PR115382].
https://gcc.gnu.org/g:2b438a0d2aa80f051a09b245a58f643540d4004b commit r15-1187-g2b438a0d2aa80f051a09b245a58f643540d4004b Author: Robin Dapp Date: Fri Jun 7 14:36:41 2024 +0200 vect: Merge loop mask and cond_op mask in fold-left reduction [PR115382]. Currently we discard the cond-op mask when the loop is fully masked which causes wrong code in gcc.dg/vect/vect-cond-reduc-in-order-2-signed-zero.c when compiled with -O3 -march=cascadelake --param vect-partial-vector-usage=2. This patch ANDs both masks. gcc/ChangeLog: PR tree-optimization/115382 * tree-vect-loop.cc (vectorize_fold_left_reduction): Use prepare_vec_mask. * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Remove static of prepare_vec_mask. * tree-vectorizer.h (prepare_vec_mask): Export. Diff: --- gcc/tree-vect-loop.cc | 10 +- gcc/tree-vect-stmts.cc | 2 +- gcc/tree-vectorizer.h | 3 +++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index c471f1564a72..5b1ad06eca66 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -7204,7 +7204,15 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, tree len = NULL_TREE; tree bias = NULL_TREE; if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) - mask = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num, vectype_in, i); + { + tree loop_mask = vect_get_loop_mask (loop_vinfo, gsi, masks, + vec_num, vectype_in, i); + if (is_cond_op) + mask = prepare_vec_mask (loop_vinfo, TREE_TYPE (loop_mask), +loop_mask, vec_opmask[i], gsi); + else + mask = loop_mask; + } else if (is_cond_op) mask = vec_opmask[i]; if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 05a169ecb2dd..831f18253765 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1643,7 +1643,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, MASK_TYPE is the type of both masks. If new statements are needed, insert them before GSI. */ -static tree +tree prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask, tree vec_mask, gimple_stmt_iterator *gsi) { diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 97ec9c341e7d..6bb0f5c3a56f 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2508,6 +2508,9 @@ extern void vect_free_slp_tree (slp_tree); extern bool compatible_calls_p (gcall *, gcall *); extern int vect_slp_child_index_for_operand (const gimple *, int op, bool); +extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree, + gimple_stmt_iterator *); + /* In tree-vect-patterns.cc. */ extern void vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree);
[gcc r15-4378] RISC-V: Use biggest_mode as mode for constants.
https://gcc.gnu.org/g:cc217a1ecb04c9234b2cce7ba3c27701a050e402 commit r15-4378-gcc217a1ecb04c9234b2cce7ba3c27701a050e402 Author: Robin Dapp Date: Tue Oct 15 12:10:48 2024 +0200 RISC-V: Use biggest_mode as mode for constants. In compute_nregs_for_mode we expect that the current variable's mode is at most as large as the biggest mode to be used for vectorization. This might not be true for constants as they don't actually have a mode. In that case, just use the biggest mode so max_number_of_live_regs returns 1. This fixes several test cases in the test suite. gcc/ChangeLog: PR target/116655 * config/riscv/riscv-vector-costs.cc (max_number_of_live_regs): Use biggest mode instead of constant's saved mode. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr116655.c: New test. Diff: --- gcc/config/riscv/riscv-vector-costs.cc| 14 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116655.c | 11 +++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 25570bd40040..67b9e3e8f413 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -194,7 +194,7 @@ compute_local_program_points ( /* Collect the stmts that is vectorized and mark their program point. */ for (i = 0; i < nbbs; i++) { - int point = 1; + unsigned int point = 1; basic_block bb = bbs[i]; vec program_points = vNULL; if (dump_enabled_p ()) @@ -489,9 +489,15 @@ max_number_of_live_regs (loop_vec_info loop_vinfo, const basic_block bb, pair live_range = (*iter).second; for (i = live_range.first + 1; i <= live_range.second; i++) { - machine_mode mode = TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE - ? BImode - : TYPE_MODE (TREE_TYPE (var)); + machine_mode mode; + if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE) + mode = BImode; + /* Constants do not have a mode, just use the biggest so +compute_nregs will return 1. */ + else if (TREE_CODE (var) == INTEGER_CST) + mode = biggest_mode; + else + mode = TYPE_MODE (TREE_TYPE (var)); unsigned int nregs = compute_nregs_for_mode (loop_vinfo, mode, biggest_mode, lmul); live_vars_vec[i] += nregs; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116655.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116655.c new file mode 100644 index ..36768e37d005 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr116655.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64imv -mabi=lp64d -mrvv-max-lmul=dynamic" } */ + +short a[5]; +int b() { + int c = 0; + for (; c <= 4; c++) +if (a[c]) + break; + return c; +}
[gcc r15-3829] RISC-V: Add more vector-vector extract cases.
https://gcc.gnu.org/g:be50c763a07893416419b82538f259f43e0773d4 commit r15-3829-gbe50c763a07893416419b82538f259f43e0773d4 Author: Robin Dapp Date: Tue Sep 3 17:53:34 2024 +0200 RISC-V: Add more vector-vector extract cases. This adds a V16SI -> V4SI and related i.e. "quartering" vector-vector extract expander for VLS modes. It helps with spills in x264 that may cause a load-hit-store. gcc/ChangeLog: * config/riscv/autovec.md (vec_extract): Add quarter vec-vec extract. * config/riscv/vector-iterators.md: New iterators. Diff: --- gcc/config/riscv/autovec.md | 28 ++ gcc/config/riscv/vector-iterators.md | 184 +++ 2 files changed, 212 insertions(+) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index a53c44659f0f..836cdd4491f6 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -1488,6 +1488,34 @@ DONE; }) +(define_expand "vec_extract" + [(set (match_operand:0 "nonimmediate_operand") + (vec_select: + (match_operand:VLS_HAS_QUARTER 1 "register_operand") + (parallel +[(match_operand 2 "immediate_operand")])))] + "TARGET_VECTOR" +{ + int sz = GET_MODE_NUNITS (mode).to_constant (); + int part = INTVAL (operands[2]); + + rtx start = GEN_INT (part * sz); + rtx tmp = operands[1]; + + if (part != 0) +{ + tmp = gen_reg_rtx (mode); + + rtx ops[] = {tmp, operands[1], start}; + riscv_vector::emit_vlmax_insn + (code_for_pred_slide (UNSPEC_VSLIDEDOWN, mode), +riscv_vector::BINARY_OP, ops); +} + + emit_move_insn (operands[0], gen_lowpart (mode, tmp)); + DONE; +}) + ;; - ;; [FP] Binary operations ;; - diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index a00b5c3feddd..43325d1ba87a 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -4328,3 +4328,187 @@ (V256DF "v128df") (V512DF "v256df") ]) + +(define_mode_iterator VLS_HAS_QUARTER [ + (V4QI "riscv_vector::vls_mode_valid_p (V4QImode)") + (V8QI "riscv_vector::vls_mode_valid_p (V8QImode)") + (V16QI "riscv_vector::vls_mode_valid_p (V16QImode)") + (V4HI "riscv_vector::vls_mode_valid_p (V4HImode)") + (V8HI "riscv_vector::vls_mode_valid_p (V8HImode)") + (V16HI "riscv_vector::vls_mode_valid_p (V16HImode)") + (V4SI "riscv_vector::vls_mode_valid_p (V4SImode)") + (V8SI "riscv_vector::vls_mode_valid_p (V8SImode)") + (V16SI "riscv_vector::vls_mode_valid_p (V16SImode) && TARGET_MIN_VLEN >= 64") + (V4DI "riscv_vector::vls_mode_valid_p (V4DImode) && TARGET_VECTOR_ELEN_64") + (V8DI "riscv_vector::vls_mode_valid_p (V8DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 64") + (V16DI "riscv_vector::vls_mode_valid_p (V16DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") + (V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32") + (V8SF "riscv_vector::vls_mode_valid_p (V8SFmode) && TARGET_VECTOR_ELEN_FP_32") + (V16SF "riscv_vector::vls_mode_valid_p (V16SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64") + (V4DF "riscv_vector::vls_mode_valid_p (V4DFmode) && TARGET_VECTOR_ELEN_FP_64") + (V8DF "riscv_vector::vls_mode_valid_p (V8DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 64") + (V16DF "riscv_vector::vls_mode_valid_p (V16DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") + (V32QI "riscv_vector::vls_mode_valid_p (V32QImode)") + (V64QI "riscv_vector::vls_mode_valid_p (V64QImode) && TARGET_MIN_VLEN >= 64") + (V128QI "riscv_vector::vls_mode_valid_p (V128QImode) && TARGET_MIN_VLEN >= 128") + (V256QI "riscv_vector::vls_mode_valid_p (V256QImode) && TARGET_MIN_VLEN >= 256") + (V512QI "riscv_vector::vls_mode_valid_p (V512QImode) && TARGET_MIN_VLEN >= 512") + (V1024QI "riscv_vector::vls_mode_valid_p (V1024QImode) && TARGET_MIN_VLEN >= 1024") + (V2048QI "riscv_vector::vls_mode_valid_p (V2048QImode) && TARGET_MIN_VLEN >= 2048") + (V4096QI "riscv_vector::vls_mode_valid_p (V4096QImode) && TARGET_MIN_VLEN >= 4096") + (V32HI "riscv_vector::vls_mode_valid_p (V32HImode) && TARGET_MIN_VLEN >= 64") + (V64HI "riscv_vector::vls_mode_valid_p (V64HImode) && TARGET_MIN_VLEN >= 128") + (V128HI "riscv_vector::vls_mode_valid_p (V128HImode) && TARGET_MIN_VLEN >= 256") + (V256HI "riscv_vector::vls_mode_valid_p (V256HImode) && TARGET_MIN_VLEN >= 512") + (V512HI "riscv_vector::vls_mode_valid_p (V512HImode) && TARGET_MIN_VLEN >= 1024") + (V1024HI "riscv_vector::vls_mode_valid_p (V1024HImode) && TARGET_MIN_VLEN >= 2048") + (V2048HI "riscv_vector::vls_mode_valid_p (V2048HImode) && TARGET_MIN_VLEN >= 4096") + (V32SI "riscv_vector::vls_mode_valid_p (V32SImode) && TARGET_MIN_VLEN >= 128") + (V64SI "riscv
[gcc r15-3828] RISC-V: Fix effective target check.
https://gcc.gnu.org/g:e45537f56250f19cdf2ec09a744c6b11170c1001 commit r15-3828-ge45537f56250f19cdf2ec09a744c6b11170c1001 Author: Robin Dapp Date: Fri Aug 30 14:35:08 2024 +0200 RISC-V: Fix effective target check. The return value is inverted in check_effective_target_rvv_zvl256b_ok and check_effective_target_rvv_zvl512b_ok. Fix this and also just use the current march. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Fix effective target check. Diff: --- gcc/testsuite/lib/target-supports.exp | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 8f2afe866c7c..05a63c4e9a55 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1978,15 +1978,15 @@ proc check_effective_target_riscv_v { } { proc check_effective_target_rvv_zvl256b_ok { } { # Check if the target has a VLENB of 32. -set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v] +set gcc_march [riscv_get_arch] return [check_runtime ${gcc_march}_exec { int main() { int vlenb = 0; asm ("csrr %0,vlenb" : "=r" (vlenb) : : ); if (vlenb == 32) - return 1; - return 0; + return 0; + return 1; } } "-march=${gcc_march}"] } @@ -1996,15 +1996,15 @@ proc check_effective_target_rvv_zvl256b_ok { } { proc check_effective_target_rvv_zvl512b_ok { } { # Check if the target has a VLENB of 64. -set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v] +set gcc_march [riscv_get_arch] return [check_runtime ${gcc_march}_exec { int main() { int vlenb = 0; asm ("csrr %0,vlenb" : "=r" (vlenb) : : ); if (vlenb == 64) - return 1; - return 0; + return 0; + return 1; } } "-march=${gcc_march}"] }
[gcc r15-3830] RISC-V: testsuite: Fix SELECT_VL SLP fallout.
https://gcc.gnu.org/g:4bd3ccae58d40fad6bd99ed08ef4e1e4d70fefd0 commit r15-3830-g4bd3ccae58d40fad6bd99ed08ef4e1e4d70fefd0 Author: Robin Dapp Date: Thu Sep 19 05:08:47 2024 -0700 RISC-V: testsuite: Fix SELECT_VL SLP fallout. This fixes asm-scan fallout from r15-3712-g5e3a4a01785e2d where we allow SLP with SELECT_VL. Assisted by sed and regtested on rv64gcv_zvfh_zvbb. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c: Expect length-controlled loop. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_
[gcc r15-5389] RISC-V: Add else operand to masked loads [PR115336].
https://gcc.gnu.org/g:b89273a049a76ffc29dd43a536ad329f0d994c05 commit r15-5389-gb89273a049a76ffc29dd43a536ad329f0d994c05 Author: Robin Dapp Date: Thu Aug 8 10:31:22 2024 +0200 RISC-V: Add else operand to masked loads [PR115336]. This patch adds else operands to masked loads. Currently the default else operand predicate just accepts "undefined" (i.e. SCRATCH) values. PR middle-end/115336 PR middle-end/116059 gcc/ChangeLog: * config/riscv/autovec.md: Add else operand. * config/riscv/predicates.md (maskload_else_operand): New predicate. * config/riscv/riscv-v.cc (get_else_operand): Remove static. (expand_load_store): Use get_else_operand and adjust index. (expand_gather_scatter): Ditto. (expand_lanes_load_store): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr115336.c: New test. * gcc.target/riscv/rvv/autovec/pr116059.c: New test. Diff: --- gcc/config/riscv/autovec.md| 50 +- gcc/config/riscv/predicates.md | 3 ++ gcc/config/riscv/riscv-v.cc| 30 - .../gcc.target/riscv/rvv/autovec/pr115336.c| 20 + .../gcc.target/riscv/rvv/autovec/pr116059.c| 15 +++ 5 files changed, 88 insertions(+), 30 deletions(-) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index b5fbe98b5fc5..c64ef5a12b43 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -26,8 +26,9 @@ [(match_operand:V 0 "register_operand") (match_operand:V 1 "memory_operand") (match_operand: 2 "vector_mask_operand") - (match_operand 3 "autovec_length_operand") - (match_operand 4 "const_0_operand")] + (match_operand:V 3 "maskload_else_operand") + (match_operand 4 "autovec_length_operand") + (match_operand 5 "const_0_operand")] "TARGET_VECTOR" { riscv_vector::expand_load_store (operands, true); @@ -57,8 +58,9 @@ (match_operand 3 "") (match_operand 4 "") (match_operand: 5 "vector_mask_operand") - (match_operand 6 "autovec_length_operand") - (match_operand 7 "const_0_operand")] + (match_operand 6 "maskload_else_operand") + (match_operand 7 "autovec_length_operand") + (match_operand 8 "const_0_operand")] "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p (mode)" { riscv_vector::expand_gather_scatter (operands, true); @@ -72,8 +74,9 @@ (match_operand 3 "") (match_operand 4 "") (match_operand: 5 "vector_mask_operand") - (match_operand 6 "autovec_length_operand") - (match_operand 7 "const_0_operand")] + (match_operand 6 "maskload_else_operand") + (match_operand 7 "autovec_length_operand") + (match_operand 8 "const_0_operand")] "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p (mode)" { riscv_vector::expand_gather_scatter (operands, true); @@ -87,8 +90,9 @@ (match_operand 3 "") (match_operand 4 "") (match_operand: 5 "vector_mask_operand") - (match_operand 6 "autovec_length_operand") - (match_operand 7 "const_0_operand")] + (match_operand 6 "maskload_else_operand") + (match_operand 7 "autovec_length_operand") + (match_operand 8 "const_0_operand")] "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p (mode)" { riscv_vector::expand_gather_scatter (operands, true); @@ -102,8 +106,9 @@ (match_operand 3 "") (match_operand 4 "") (match_operand: 5 "vector_mask_operand") - (match_operand 6 "autovec_length_operand") - (match_operand 7 "const_0_operand")] + (match_operand 6 "maskload_else_operand") + (match_operand 7 "autovec_length_operand") + (match_operand 8 "const_0_operand")] "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p (mode)" { riscv_vector::expand_gather_scatter (operands, true); @@ -117,8 +122,9 @@ (match_operand 3 "") (match_operand 4 "") (match_operand: 5 "vector_mask_operand") - (match_operand 6 "autovec_length_operand") - (match_operand 7 "const_0_operand")] + (match_operand 6 "maskload_else_operand") + (match_operand 7 "autovec_length_operand") + (match_operand 8 "const_0_operand")] "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p (mode)" { riscv_vector::expand_gather_scatter (operands, true); @@ -132,8 +138,9 @@ (match_operand 3 "") (match_operand 4 "") (match_operand: 5 "vector_mask_operand") - (match_operand 6 "autovec_length_operand") - (match_operand 7 "const_0_operand")] + (match_operand 6 "maskload_else_operand") + (match_operand 7 "autovec_length_operand") + (match_operand 8 "const_0_operand")] "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p (mode)" { riscv_vector::expand_gather_scatter (operands, true); @@ -151,8 +158,9 @@ (match_operand 3 "") (match_operand 4 "") (match_operand: 5 "vector_m
[gcc r15-5385] vect: Add maskload else value support.
https://gcc.gnu.org/g:634ae740f5a839df6567c58cfdcd32a3833c4626 commit r15-5385-g634ae740f5a839df6567c58cfdcd32a3833c4626 Author: Robin Dapp Date: Thu Aug 8 14:29:05 2024 +0200 vect: Add maskload else value support. This patch adds an else operand to vectorized masked load calls. The current implementation adds else-value arguments to the respective target-querying functions that is used to supply the vectorizer with the proper else value. We query the target for its supported else operand and uses that for the maskload call. If necessary, i.e. if the mode has padding bits and if the else operand is nonzero, a VEC_COND enforcing a zero else value is emitted. gcc/ChangeLog: * optabs-query.cc (supports_vec_convert_optab_p): Return icode. (get_supported_else_val): Return supported else value for optab's operand at index. (supports_vec_gather_load_p): Add else argument. (supports_vec_scatter_store_p): Ditto. * optabs-query.h (supports_vec_gather_load_p): Ditto. (get_supported_else_val): Ditto. * optabs-tree.cc (target_supports_mask_load_store_p): Ditto. (can_vec_mask_load_store_p): Ditto. (target_supports_len_load_store_p): Ditto. (get_len_load_store_mode): Ditto. * optabs-tree.h (target_supports_mask_load_store_p): Ditto. (can_vec_mask_load_store_p): Ditto. * tree-vect-data-refs.cc (vect_lanes_optab_supported_p): Ditto. (vect_gather_scatter_fn_p): Ditto. (vect_check_gather_scatter): Ditto. (vect_load_lanes_supported): Ditto. * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Ditto. * tree-vect-slp.cc (vect_get_operand_map): Adjust indices for else operand. (vect_slp_analyze_node_operations): Skip undefined else operand. * tree-vect-stmts.cc (exist_non_indexing_operands_for_use_p): Add else operand handling. (vect_get_vec_defs_for_operand): Handle undefined else operand. (check_load_store_for_partial_vectors): Add else argument. (vect_truncate_gather_scatter_offset): Ditto. (vect_use_strided_gather_scatters_p): Ditto. (get_group_load_store_type): Ditto. (get_load_store_type): Ditto. (vect_get_mask_load_else): Ditto. (vect_get_else_val_from_tree): Ditto. (vect_build_one_gather_load_call): Add zero else operand. (vectorizable_load): Use else operand. * tree-vectorizer.h (vect_gather_scatter_fn_p): Add else argument. (vect_load_lanes_supported): Ditto. (vect_get_mask_load_else): Ditto. (vect_get_else_val_from_tree): Ditto. Diff: --- gcc/optabs-query.cc| 70 +++--- gcc/optabs-query.h | 3 +- gcc/optabs-tree.cc | 66 ++--- gcc/optabs-tree.h | 8 +- gcc/tree-vect-data-refs.cc | 74 +++--- gcc/tree-vect-patterns.cc | 12 +- gcc/tree-vect-slp.cc | 22 ++- gcc/tree-vect-stmts.cc | 326 + gcc/tree-vectorizer.h | 10 +- 9 files changed, 466 insertions(+), 125 deletions(-) diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc index cc52bc0f5ea7..c1f3558af920 100644 --- a/gcc/optabs-query.cc +++ b/gcc/optabs-query.cc @@ -29,6 +29,9 @@ along with GCC; see the file COPYING3. If not see #include "rtl.h" #include "recog.h" #include "vec-perm-indices.h" +#include "internal-fn.h" +#include "memmodel.h" +#include "optabs.h" struct target_optabs default_target_optabs; struct target_optabs *this_fn_optabs = &default_target_optabs; @@ -672,34 +675,57 @@ lshift_cheap_p (bool speed_p) that mode, given that the second mode is always an integer vector. If MODE is VOIDmode, return true if OP supports any vector mode. */ -static bool -supports_vec_convert_optab_p (optab op, machine_mode mode) +static enum insn_code +supported_vec_convert_optab (optab op, machine_mode mode) { int start = mode == VOIDmode ? 0 : mode; int end = mode == VOIDmode ? MAX_MACHINE_MODE - 1 : mode; + enum insn_code icode = CODE_FOR_nothing; for (int i = start; i <= end; ++i) if (VECTOR_MODE_P ((machine_mode) i)) for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j) - if (convert_optab_handler (op, (machine_mode) i, - (machine_mode) j) != CODE_FOR_nothing) - return true; + { + if ((icode + = convert_optab_handler (op, (machine_mode) i, + (machine_mode) j)) != CODE_FOR_nothing) + return icode; + } - return false; + return icode; } /* If MODE is not VOIDmode, return true if vec_gather_load is available for that mode. If MODE is V
[gcc r15-5387] gcn: Add else operand to masked loads.
https://gcc.gnu.org/g:4a39addb4921ca1f7aa013835cd1351226e5e6b6 commit r15-5387-g4a39addb4921ca1f7aa013835cd1351226e5e6b6 Author: Robin Dapp Date: Thu Aug 8 10:31:05 2024 +0200 gcn: Add else operand to masked loads. This patch adds an undefined else operand to the masked loads. gcc/ChangeLog: * config/gcn/predicates.md (maskload_else_operand): New predicate. * config/gcn/gcn-valu.md: Use new predicate. Diff: --- gcc/config/gcn/gcn-valu.md | 23 +++ gcc/config/gcn/predicates.md | 2 ++ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index cb2f4a780355..ce7a68f0e2d3 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -3989,7 +3989,8 @@ (define_expand "maskloaddi" [(match_operand:V_MOV 0 "register_operand") (match_operand:V_MOV 1 "memory_operand") - (match_operand 2 "")] + (match_operand 2 "") + (match_operand:V_MOV 3 "maskload_else_operand")] "" { rtx exec = force_reg (DImode, operands[2]); @@ -3998,11 +3999,8 @@ rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); -/* Masked lanes are required to hold zero. */ -emit_move_insn (operands[0], gcn_vec_constant (mode, 0)); - emit_insn (gen_gather_expr_exec (operands[0], addr, as, v, - operands[0], exec)); + gcn_gen_undef (mode), exec)); DONE; }) @@ -4027,7 +4025,8 @@ (match_operand: 2 "register_operand") (match_operand 3 "immediate_operand") (match_operand:SI 4 "gcn_alu_operand") - (match_operand:DI 5 "")] + (match_operand:DI 5 "") + (match_operand:V_MOV 6 "maskload_else_operand")] "" { rtx exec = force_reg (DImode, operands[5]); @@ -4036,18 +4035,18 @@ operands[2], operands[4], INTVAL (operands[3]), exec); -/* Masked lanes are required to hold zero. */ -emit_move_insn (operands[0], gcn_vec_constant (mode, 0)); - if (GET_MODE (addr) == mode) emit_insn (gen_gather_insn_1offset_exec (operands[0], addr, const0_rtx, const0_rtx, -const0_rtx, operands[0], -exec)); +gcn_gen_undef + (mode), +operands[0], exec)); else emit_insn (gen_gather_insn_2offsets_exec (operands[0], operands[1], addr, const0_rtx, - const0_rtx, const0_rtx, + const0_rtx, + gcn_gen_undef + (mode), operands[0], exec)); DONE; }) diff --git a/gcc/config/gcn/predicates.md b/gcc/config/gcn/predicates.md index 3f59396a6498..21beeb586a44 100644 --- a/gcc/config/gcn/predicates.md +++ b/gcc/config/gcn/predicates.md @@ -228,3 +228,5 @@ return gcn_stepped_zero_int_parallel_p (op, 1); }) +(define_predicate "maskload_else_operand" + (match_operand 0 "scratch_operand"))
[gcc r15-5386] aarch64: Add masked-load else operands.
https://gcc.gnu.org/g:a166a6ccdc6c3d6532a24ba3a2057a177ce44752 commit r15-5386-ga166a6ccdc6c3d6532a24ba3a2057a177ce44752 Author: Robin Dapp Date: Thu Aug 8 10:30:58 2024 +0200 aarch64: Add masked-load else operands. This adds zero else operands to masked loads and their intrinsics. I needed to adjust more than initially thought because we rely on combine for several instructions and a change in a "base" pattern needs to propagate to all those. gcc/ChangeLog: * config/aarch64/aarch64-sve-builtins-base.cc: Add else handling. * config/aarch64/aarch64-sve-builtins.cc (function_expander::use_contiguous_load_insn): Ditto. * config/aarch64/aarch64-sve-builtins.h: Add else operand to contiguous load. * config/aarch64/aarch64-sve.md (@aarch64_load _): Split and add else operand. (@aarch64_load_): Ditto. (*aarch64_load__mov): Ditto. * config/aarch64/aarch64-sve2.md: Ditto. * config/aarch64/iterators.md: Remove unused iterators. * config/aarch64/predicates.md (aarch64_maskload_else_operand): Add zero else operand. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-base.cc | 24 +++- gcc/config/aarch64/aarch64-sve-builtins.cc | 12 +- gcc/config/aarch64/aarch64-sve-builtins.h | 2 +- gcc/config/aarch64/aarch64-sve.md | 52 + gcc/config/aarch64/aarch64-sve2.md | 3 +- gcc/config/aarch64/iterators.md | 4 -- gcc/config/aarch64/predicates.md| 4 ++ 7 files changed, 77 insertions(+), 24 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 2117eceb6063..20820fb1985c 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -1524,11 +1524,12 @@ public: gimple_seq stmts = NULL; tree pred = f.convert_pred (stmts, vectype, 0); tree base = f.fold_contiguous_base (stmts, vectype); +tree els = build_zero_cst (vectype); gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); -gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3, - base, cookie, pred); +gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 4, + base, cookie, pred, els); gimple_call_set_lhs (new_call, f.lhs); return new_call; } @@ -1542,7 +1543,7 @@ public: e.vector_mode (0), e.gp_mode (0)); else icode = code_for_aarch64 (UNSPEC_LD1_COUNT, e.tuple_mode (0)); -return e.use_contiguous_load_insn (icode); +return e.use_contiguous_load_insn (icode, true); } }; @@ -1555,10 +1556,10 @@ public: rtx expand (function_expander &e) const override { -insn_code icode = code_for_aarch64_load (UNSPEC_LD1_SVE, extend_rtx_code (), +insn_code icode = code_for_aarch64_load (extend_rtx_code (), e.vector_mode (0), e.memory_vector_mode ()); -return e.use_contiguous_load_insn (icode); +return e.use_contiguous_load_insn (icode, true); } }; @@ -1577,6 +1578,8 @@ public: e.prepare_gather_address_operands (1); /* Put the predicate last, as required by mask_gather_load_optab. */ e.rotate_inputs_left (0, 5); +/* Add the else operand. */ +e.args.quick_push (CONST0_RTX (e.vector_mode (0))); machine_mode mem_mode = e.memory_vector_mode (); machine_mode int_mode = aarch64_sve_int_mode (mem_mode); insn_code icode = convert_optab_handler (mask_gather_load_optab, @@ -1600,6 +1603,8 @@ public: e.rotate_inputs_left (0, 5); /* Add a constant predicate for the extension rtx. */ e.args.quick_push (CONSTM1_RTX (VNx16BImode)); +/* Add the else operand. */ +e.args.quick_push (CONST0_RTX (e.vector_mode (1))); insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (), e.vector_mode (0), e.memory_vector_mode ()); @@ -1742,6 +1747,7 @@ public: /* Get the predicate and base pointer. */ gimple_seq stmts = NULL; tree pred = f.convert_pred (stmts, vectype, 0); +tree els = build_zero_cst (vectype); tree base = f.fold_contiguous_base (stmts, vectype); gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); @@ -1760,8 +1766,8 @@ public: /* Emit the load itself. */ tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); -gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, -
[gcc r15-5384] tree-ifcvt: Add zero maskload else value.
https://gcc.gnu.org/g:6b6bd53619fd11bab3def8dee737711a7ee539ea commit r15-5384-g6b6bd53619fd11bab3def8dee737711a7ee539ea Author: Robin Dapp Date: Thu Aug 8 12:54:36 2024 +0200 tree-ifcvt: Add zero maskload else value. When predicating a load we implicitly assume that the else value is zero. This matters in case the loaded value is padded (like e.g. a Bool) and we must ensure that the padding bytes are zero on targets that don't implicitly zero inactive elements. A former version of this patch still had this handling in ifcvt but the latest version defers it to the vectorizer. gcc/ChangeLog: * tree-if-conv.cc (predicate_load_or_store): Add zero else operand and comment. Diff: --- gcc/tree-if-conv.cc | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index eb981642bae1..f1a1f8fd0d35 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -2555,9 +2555,17 @@ predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask) ref); if (TREE_CODE (lhs) == SSA_NAME) { + /* Get a zero else value. This might not be what a target actually uses +but we cannot be sure about which vector mode the vectorizer will +choose. Therefore, leave the decision whether we need to force the +inactive elements to zero to the vectorizer. */ + tree els = vect_get_mask_load_else (MASK_LOAD_ELSE_ZERO, + TREE_TYPE (lhs)); + new_stmt - = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr, - ptr, mask); + = gimple_build_call_internal (IFN_MASK_LOAD, 4, addr, + ptr, mask, els); + gimple_call_set_lhs (new_stmt, lhs); gimple_set_vuse (new_stmt, gimple_vuse (stmt)); }
[gcc r15-5382] docs: Document maskload else operand and behavior.
https://gcc.gnu.org/g:5214ddb464aab6c98b6eb6a267dcc9952f030d2f commit r15-5382-g5214ddb464aab6c98b6eb6a267dcc9952f030d2f Author: Robin Dapp Date: Thu Aug 8 10:32:25 2024 +0200 docs: Document maskload else operand and behavior. This patch amends the documentation for masked loads (maskload, vec_mask_load_lanes, and mask_gather_load as well as their len counterparts) with an else operand. gcc/ChangeLog: * doc/md.texi: Document masked load else operand. Diff: --- gcc/doc/md.texi | 63 + 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 25ded86f0d14..c8f1424a0424 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5014,8 +5014,10 @@ This pattern is not allowed to @code{FAIL}. @item @samp{vec_mask_load_lanes@var{m}@var{n}} Like @samp{vec_load_lanes@var{m}@var{n}}, but takes an additional mask operand (operand 2) that specifies which elements of the destination -vectors should be loaded. Other elements of the destination -vectors are set to zero. The operation is equivalent to: +vectors should be loaded. Other elements of the destination vectors are +taken from operand 3, which is an else operand similar to the one in +@code{maskload}. +The operation is equivalent to: @smallexample int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n}); @@ -5025,7 +5027,7 @@ for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++) operand0[i][j] = operand1[j * c + i]; else for (i = 0; i < c; i++) - operand0[i][j] = 0; + operand0[i][j] = operand3[j]; @end smallexample This pattern is not allowed to @code{FAIL}. @@ -5033,16 +5035,20 @@ This pattern is not allowed to @code{FAIL}. @cindex @code{vec_mask_len_load_lanes@var{m}@var{n}} instruction pattern @item @samp{vec_mask_len_load_lanes@var{m}@var{n}} Like @samp{vec_load_lanes@var{m}@var{n}}, but takes an additional -mask operand (operand 2), length operand (operand 3) as well as bias operand (operand 4) -that specifies which elements of the destination vectors should be loaded. -Other elements of the destination vectors are undefined. The operation is equivalent to: +mask operand (operand 2), length operand (operand 4) as well as bias operand +(operand 5) that specifies which elements of the destination vectors should be +loaded. Other elements of the destination vectors are taken from operand 3, +which is an else operand similar to the one in @code{maskload}. +The operation is equivalent to: @smallexample int c = GET_MODE_SIZE (@var{m}) / GET_MODE_SIZE (@var{n}); -for (j = 0; j < operand3 + operand4; j++) - if (operand2[j]) -for (i = 0; i < c; i++) +for (j = 0; j < operand4 + operand5; j++) + for (i = 0; i < c; i++) +if (operand2[j]) operand0[i][j] = operand1[j * c + i]; +else + operand0[i][j] = operand3[j]; @end smallexample This pattern is not allowed to @code{FAIL}. @@ -5122,18 +5128,25 @@ address width. @cindex @code{mask_gather_load@var{m}@var{n}} instruction pattern @item @samp{mask_gather_load@var{m}@var{n}} Like @samp{gather_load@var{m}@var{n}}, but takes an extra mask operand as -operand 5. Bit @var{i} of the mask is set if element @var{i} +operand 5. +Other elements of the destination vectors are taken from operand 6, +which is an else operand similar to the one in @code{maskload}. +Bit @var{i} of the mask is set if element @var{i} of the result should be loaded from memory and clear if element @var{i} -of the result should be set to zero. +of the result should be set to operand 6. @cindex @code{mask_len_gather_load@var{m}@var{n}} instruction pattern @item @samp{mask_len_gather_load@var{m}@var{n}} -Like @samp{gather_load@var{m}@var{n}}, but takes an extra mask operand (operand 5), -a len operand (operand 6) as well as a bias operand (operand 7). Similar to mask_len_load, -the instruction loads at most (operand 6 + operand 7) elements from memory. +Like @samp{gather_load@var{m}@var{n}}, but takes an extra mask operand +(operand 5) and an else operand (operand 6) as well as a len operand +(operand 7) and a bias operand (operand 8). + +Similar to mask_len_load the instruction loads at +most (operand 7 + operand 8) elements from memory. Bit @var{i} of the mask is set if element @var{i} of the result should -be loaded from memory and clear if element @var{i} of the result should be undefined. -Mask elements @var{i} with @var{i} > (operand 6 + operand 7) are ignored. +be loaded from memory and clear if element @var{i} of the result should +be set to element @var{i} of operand 6. +Mask elements @var{i} with @var{i} > (operand 7 + operand 8) are ignored. @cindex @code{mask_len_strided_load@var{m}} instruction pattern @item @samp{mask_len_strided_load@var{m}} @@ -5392,8 +5405,13 @@ Operands 4 and 5 have a target-dependent scalar integer mode. @cindex @code{maskload@var{m}@var{n}} instruction pattern @item @samp{maskload@v
[gcc r15-5390] RISC-V: Add VLS modes to strided loads.
https://gcc.gnu.org/g:52a392b8b797d01a7b0b06c8f20b0bf8374d489e commit r15-5390-g52a392b8b797d01a7b0b06c8f20b0bf8374d489e Author: Robin Dapp Date: Mon Nov 4 15:34:50 2024 +0100 RISC-V: Add VLS modes to strided loads. This patch adds VLS modes to the strided load expanders. gcc/ChangeLog: * config/riscv/autovec.md: Add VLS modes. * config/riscv/vector-iterators.md: Ditto. * config/riscv/vector.md: Ditto. Diff: --- gcc/config/riscv/autovec.md | 4 +- gcc/config/riscv/vector-iterators.md | 243 +++ gcc/config/riscv/vector.md | 22 ++-- 3 files changed, 256 insertions(+), 13 deletions(-) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index c64ef5a12b43..2529dc77f221 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2903,7 +2903,7 @@ ;; == Strided Load/Store ;; = (define_expand "mask_len_strided_load_" - [(match_operand:V 0 "register_operand") + [(match_operand:V_VLS 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand 2 "pmode_reg_or_0_operand") (match_operand: 3 "vector_mask_operand") @@ -2919,7 +2919,7 @@ (define_expand "mask_len_strided_store_" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand 1 "pmode_reg_or_0_operand") - (match_operand:V 2 "register_operand") + (match_operand:V_VLS 2 "register_operand") (match_operand: 3 "vector_mask_operand") (match_operand 4 "autovec_length_operand") (match_operand 5 "const_0_operand")] diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index 43325d1ba87a..6a621459cc4a 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -3524,6 +3524,87 @@ (RVVM8DF "vector_eew64_stride_operand") (RVVM4DF "vector_eew64_stride_operand") (RVVM2DF "vector_eew64_stride_operand") (RVVM1DF "vector_eew64_stride_operand") + + (V1QI "vector_eew8_stride_operand") + (V2QI "vector_eew8_stride_operand") + (V4QI "vector_eew8_stride_operand") + (V8QI "vector_eew8_stride_operand") + (V16QI "vector_eew8_stride_operand") + (V32QI "vector_eew8_stride_operand") + (V64QI "vector_eew8_stride_operand") + (V128QI "vector_eew8_stride_operand") + (V256QI "vector_eew8_stride_operand") + (V512QI "vector_eew8_stride_operand") + (V1024QI "vector_eew8_stride_operand") + (V2048QI "vector_eew8_stride_operand") + (V4096QI "vector_eew8_stride_operand") + (V1HI "vector_eew16_stride_operand") + (V2HI "vector_eew16_stride_operand") + (V4HI "vector_eew16_stride_operand") + (V8HI "vector_eew16_stride_operand") + (V16HI "vector_eew16_stride_operand") + (V32HI "vector_eew16_stride_operand") + (V64HI "vector_eew16_stride_operand") + (V128HI "vector_eew16_stride_operand") + (V256HI "vector_eew16_stride_operand") + (V512HI "vector_eew16_stride_operand") + (V1024HI "vector_eew16_stride_operand") + (V2048HI "vector_eew16_stride_operand") + (V1SI "vector_eew32_stride_operand") + (V2SI "vector_eew32_stride_operand") + (V4SI "vector_eew32_stride_operand") + (V8SI "vector_eew32_stride_operand") + (V16SI "vector_eew32_stride_operand") + (V32SI "vector_eew32_stride_operand") + (V64SI "vector_eew32_stride_operand") + (V128SI "vector_eew32_stride_operand") + (V256SI "vector_eew32_stride_operand") + (V512SI "vector_eew32_stride_operand") + (V1024SI "vector_eew32_stride_operand") + (V1DI "vector_eew64_stride_operand") + (V2DI "vector_eew64_stride_operand") + (V4DI "vector_eew64_stride_operand") + (V8DI "vector_eew64_stride_operand") + (V16DI "vector_eew64_stride_operand") + (V32DI "vector_eew64_stride_operand") + (V64DI "vector_eew64_stride_operand") + (V128DI "vector_eew64_stride_operand") + (V256DI "vector_eew64_stride_operand") + (V512DI "vector_eew64_stride_operand") + + (V1HF "vector_eew16_stride_operand") + (V2HF "vector_eew16_stride_operand") + (V4HF "vector_eew16_stride_operand") + (V8HF "vector_eew16_stride_operand") + (V16HF "vector_eew16_stride_operand") + (V32HF "vector_eew16_stride_operand") + (V64HF "vector_eew16_stride_operand") + (V128HF "vector_eew16_stride_operand") + (V256HF "vector_eew16_stride_operand") + (V512HF "vector_eew16_stride_operand") + (V1024HF "vector_eew16_stride_operand") + (V2048HF "vector_eew16_stride_operand") + (V1SF "vector_eew32_stride_operand") + (V2SF "vector_eew32_stride_operand") + (V4SF "vector_eew32_stride_operand") + (V8SF "vector_eew32_stride_operand") + (V16SF "vector_eew32_stride_operand") + (V32SF "vector_eew32_stride_operand") + (V64SF "vector_eew32_stride_operand") + (V128SF "vector_eew32_stride_operand") + (V256SF "vector_eew32_stride_operand") + (V512SF "vector_eew32_stride_operand") + (V1024SF "vector_eew32_stride_operand") + (V1DF "vector_eew64_stride_ope
[gcc r15-5388] i386: Add zero maskload else operand.
https://gcc.gnu.org/g:ebf30772415cfd3fa544fc7262b28b948591538f commit r15-5388-gebf30772415cfd3fa544fc7262b28b948591538f Author: Robin Dapp Date: Tue Nov 5 14:47:07 2024 +0100 i386: Add zero maskload else operand. gcc/ChangeLog: * config/i386/sse.md (maskload): Call maskload..._1. (maskload_1): Rename. Diff: --- gcc/config/i386/sse.md | 21 ++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index efe32e5149fc..72acd5bde5e4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -28650,7 +28650,7 @@ (set_attr "btver2_decode" "vector") (set_attr "mode" "")]) -(define_expand "maskload" +(define_expand "maskload_1" [(set (match_operand:V48_128_256 0 "register_operand") (unspec:V48_128_256 [(match_operand: 2 "register_operand") @@ -28658,13 +28658,28 @@ UNSPEC_MASKMOV))] "TARGET_AVX") +(define_expand "maskload" + [(set (match_operand:V48_128_256 0 "register_operand") + (unspec:V48_128_256 + [(match_operand: 2 "register_operand") + (match_operand:V48_128_256 1 "memory_operand") + (match_operand:V48_128_256 3 "const0_operand")] + UNSPEC_MASKMOV))] + "TARGET_AVX" +{ + emit_insn (gen_maskload_1 (operands[0], + operands[1], + operands[2])); + DONE; +}) + (define_expand "maskload" [(set (match_operand:V48_AVX512VL 0 "register_operand") (vec_merge:V48_AVX512VL (unspec:V48_AVX512VL [(match_operand:V48_AVX512VL 1 "memory_operand")] UNSPEC_MASKLOAD) - (match_dup 0) + (match_operand:V48_AVX512VL 3 "const0_operand") (match_operand: 2 "register_operand")))] "TARGET_AVX512F") @@ -28674,7 +28689,7 @@ (unspec:VI12HFBF_AVX512VL [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")] UNSPEC_MASKLOAD) - (match_dup 0) + (match_operand:VI12HFBF_AVX512VL 3 "const0_operand") (match_operand: 2 "register_operand")))] "TARGET_AVX512BW")
[gcc r15-5383] ifn: Add else-operand handling.
https://gcc.gnu.org/g:8f68d9cb7897df188f7dcd733d8c385f77fd8011 commit r15-5383-g8f68d9cb7897df188f7dcd733d8c385f77fd8011 Author: Robin Dapp Date: Thu Aug 8 10:54:35 2024 +0200 ifn: Add else-operand handling. This patch adds else-operand handling to the internal functions. gcc/ChangeLog: * internal-fn.cc (add_mask_and_len_args): Rename... (add_mask_else_and_len_args): ...to this and add else handling. (expand_partial_load_optab_fn): Use adjusted function. (expand_partial_store_optab_fn): Ditto. (expand_scatter_store_optab_fn): Ditto. (expand_gather_load_optab_fn): Ditto. (internal_fn_len_index): Add else handling. (internal_fn_else_index): Ditto. (internal_fn_mask_index): Ditto. (get_supported_else_vals): New function. (supported_else_val_p): New function. (internal_gather_scatter_fn_supported_p): Add else operand. * internal-fn.h (internal_gather_scatter_fn_supported_p): Define else constants. (MASK_LOAD_ELSE_ZERO): Ditto. (MASK_LOAD_ELSE_M1): Ditto. (MASK_LOAD_ELSE_UNDEFINED): Ditto. (get_supported_else_vals): Declare. (supported_else_val_p): Ditto. Diff: --- gcc/internal-fn.cc | 148 + gcc/internal-fn.h | 13 - 2 files changed, 139 insertions(+), 22 deletions(-) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 0ee5f5bc7c55..c522a53a4334 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -333,17 +333,18 @@ get_multi_vector_move (tree array_type, convert_optab optab) return convert_optab_handler (optab, imode, vmode); } -/* Add mask and len arguments according to the STMT. */ +/* Add mask, else, and len arguments according to the STMT. */ static unsigned int -add_mask_and_len_args (expand_operand *ops, unsigned int opno, gcall *stmt) +add_mask_else_and_len_args (expand_operand *ops, unsigned int opno, gcall *stmt) { internal_fn ifn = gimple_call_internal_fn (stmt); int len_index = internal_fn_len_index (ifn); /* BIAS is always consecutive next of LEN. */ int bias_index = len_index + 1; int mask_index = internal_fn_mask_index (ifn); - /* The order of arguments are always {len,bias,mask}. */ + + /* The order of arguments is always {mask, else, len, bias}. */ if (mask_index >= 0) { tree mask = gimple_call_arg (stmt, mask_index); @@ -365,6 +366,22 @@ add_mask_and_len_args (expand_operand *ops, unsigned int opno, gcall *stmt) create_input_operand (&ops[opno++], mask_rtx, TYPE_MODE (TREE_TYPE (mask))); } + + int els_index = internal_fn_else_index (ifn); + if (els_index >= 0) +{ + tree els = gimple_call_arg (stmt, els_index); + tree els_type = TREE_TYPE (els); + if (TREE_CODE (els) == SSA_NAME + && SSA_NAME_IS_DEFAULT_DEF (els) + && VAR_P (SSA_NAME_VAR (els))) + create_undefined_input_operand (&ops[opno++], TYPE_MODE (els_type)); + else + { + rtx els_rtx = expand_normal (els); + create_input_operand (&ops[opno++], els_rtx, TYPE_MODE (els_type)); + } +} if (len_index >= 0) { tree len = gimple_call_arg (stmt, len_index); @@ -3024,7 +3041,7 @@ static void expand_partial_load_optab_fn (internal_fn ifn, gcall *stmt, convert_optab optab) { int i = 0; - class expand_operand ops[5]; + class expand_operand ops[6]; tree type, lhs, rhs, maskt; rtx mem, target; insn_code icode; @@ -3054,7 +3071,7 @@ expand_partial_load_optab_fn (internal_fn ifn, gcall *stmt, convert_optab optab) target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); create_call_lhs_operand (&ops[i++], target, TYPE_MODE (type)); create_fixed_operand (&ops[i++], mem); - i = add_mask_and_len_args (ops, i, stmt); + i = add_mask_else_and_len_args (ops, i, stmt); expand_insn (icode, i, ops); assign_call_lhs (lhs, target, &ops[0]); @@ -3100,7 +3117,7 @@ expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab optab reg = expand_normal (rhs); create_fixed_operand (&ops[i++], mem); create_input_operand (&ops[i++], reg, TYPE_MODE (type)); - i = add_mask_and_len_args (ops, i, stmt); + i = add_mask_else_and_len_args (ops, i, stmt); expand_insn (icode, i, ops); } @@ -3686,7 +3703,7 @@ expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab) create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset))); create_integer_operand (&ops[i++], scale_int); create_input_operand (&ops[i++], rhs_rtx, TYPE_MODE (TREE_TYPE (rhs))); - i = add_mask_and_len_args (ops, i, stmt); + i = add_mask_else_and_len_args (ops, i, stmt); insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE (rhs)), TYPE
[gcc r15-5444] RISC-V: Load VLS perm indices directly from memory.
https://gcc.gnu.org/g:a18592e1c30f0f539c71fa632c49cb82008ec45a commit r15-5444-ga18592e1c30f0f539c71fa632c49cb82008ec45a Author: Robin Dapp Date: Thu Sep 26 11:56:08 2024 +0200 RISC-V: Load VLS perm indices directly from memory. Instead of loading the permutation indices and using vmslt in order to determine which elements belong to which source vector we can compute the proper mask at compile time. That way we can emit vlm instead of vle + vmslt. gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_merge_patterns): Load VLS indices directly. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/merge-1.c: Check for vlm and no vmsleu etc. * gcc.target/riscv/rvv/autovec/vls/merge-2.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/merge-3.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/merge-4.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/merge-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/merge-6.c: Ditto. Diff: --- gcc/config/riscv/riscv-v.cc| 22 -- .../gcc.target/riscv/rvv/autovec/vls/merge-1.c | 2 ++ .../gcc.target/riscv/rvv/autovec/vls/merge-2.c | 2 ++ .../gcc.target/riscv/rvv/autovec/vls/merge-3.c | 2 ++ .../gcc.target/riscv/rvv/autovec/vls/merge-4.c | 2 ++ .../gcc.target/riscv/rvv/autovec/vls/merge-5.c | 2 ++ .../gcc.target/riscv/rvv/autovec/vls/merge-6.c | 2 ++ 7 files changed, 32 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index a0e22b6454b7..ee7a0128c0ed 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3101,9 +3101,27 @@ shuffle_merge_patterns (struct expand_vec_perm_d *d) machine_mode mask_mode = get_mask_mode (vmode); rtx mask = gen_reg_rtx (mask_mode); - if (indices_fit_selector_p) + if (indices_fit_selector_p && vec_len.is_constant ()) { - /* MASK = SELECTOR < NUNITS ? 1 : 0. */ + /* For a constant vector length we can generate the needed mask at +compile time and load it as mask at runtime. +This saves a compare at runtime. */ + rtx_vector_builder sel (mask_mode, d->perm.encoding ().npatterns (), + d->perm.encoding ().nelts_per_pattern ()); + unsigned int encoded_nelts = sel.encoded_nelts (); + for (unsigned int i = 0; i < encoded_nelts; i++) + sel.quick_push (gen_int_mode (d->perm[i].to_constant () + < vec_len.to_constant (), + GET_MODE_INNER (mask_mode))); + mask = sel.build (); +} + else if (indices_fit_selector_p) +{ + /* For a dynamic vector length < 256 we keep the permutation +indices in the literal pool, load it at runtime and create the +mask by selecting either OP0 or OP1 by + + INDICES < NUNITS ? 1 : 0. */ rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm); rtx x = gen_int_mode (vec_len, GET_MODE_INNER (sel_mode)); insn_code icode = code_for_pred_cmp_scalar (sel_mode); diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c index cd24922d0ad4..c34734cff6d2 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c @@ -4,3 +4,5 @@ #include "../vls-vlmax/merge-1.c" /* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */ +/* { dg-final { scan-assembler-not {\tvms} } } */ +/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c index 52d91244f51b..68f7b62e62ff 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c @@ -4,3 +4,5 @@ #include "../vls-vlmax/merge-2.c" /* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */ +/* { dg-final { scan-assembler-not {\tvms} } } */ +/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c index 4931d2a36047..1250dca65d11 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c @@ -4,3 +4,5 @@ #include "../vls-vlmax/merge-3.c" /* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */ +/* { dg-final { scan-assembler-not {\tvms} } } */ +/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c index f22a18f8ef3b..1dfd8287b7f2 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge
[gcc r15-5653] RISC-V: Ensure vtype for full-register moves [PR117544].
https://gcc.gnu.org/g:b82a5810e7bcc82b933e16f9067879b9d22b85c7 commit r15-5653-gb82a5810e7bcc82b933e16f9067879b9d22b85c7 Author: Robin Dapp Date: Thu Nov 21 14:49:53 2024 +0100 RISC-V: Ensure vtype for full-register moves [PR117544]. As discussed in PR117544 the VTYPE register is not preserved across function calls. Even though vmv1r-like instructions operate independently of the actual vtype they still require a valid vtype. As we cannot guarantee that the vtype is valid we must make sure to emit a vsetvl between a function call and a vmv1r.v. This patch makes the necessary changes by splitting the full-reg-move insns into patterns that use the vtype register and adding vmov to the types of instructions requiring a vset. PR target/117544 gcc/ChangeLog: * config/riscv/vector.md (*mov_whole): Split. (*mov_fract): Ditto. (*mov): Ditto. (*mov_vls): Ditto. (*mov_reg_whole_vtype): New pattern with vtype use. (*mov_fract_vtype): Ditto. (*mov_vtype): Ditto. (*mov_vls_vtype): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/abi-call-args-4.c: Expect vsetvl. * gcc.target/riscv/rvv/base/pr117544.c: New test. Diff: --- gcc/config/riscv/vector.md | 91 -- .../gcc.target/riscv/rvv/base/abi-call-args-4.c| 1 + gcc/testsuite/gcc.target/riscv/rvv/base/pr117544.c | 14 3 files changed, 99 insertions(+), 7 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 02cbd2f56f19..57e3c34c1c5a 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -42,7 +42,8 @@ (cond [(eq_attr "type" "vlde,vste,vldm,vstm,vlds,vsts,\ vldux,vldox,vstux,vstox,vldff,\ vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,\ - vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge,vimov,\ + vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge, + vmov,vimov,\ vsalu,vaalu,vsmul,vsshift,vnclip,\ vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,\ vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,\ @@ -1214,21 +1215,58 @@ ;; which is not the pattern we want. ;; According the facts above, we make "*mov_whole" includes load/store/move for whole ;; vector modes according to '-march' and "*mov_fract" only include fractional vector modes. -(define_insn "*mov_whole" +(define_insn_and_split "*mov_whole" [(set (match_operand:V_WHOLE 0 "reg_or_mem_operand" "=vr, m,vr") (match_operand:V_WHOLE 1 "reg_or_mem_operand" " m,vr,vr"))] "TARGET_VECTOR && !TARGET_XTHEADVECTOR" "@ vl%m1re.v\t%0,%1 vs%m1r.v\t%1,%0 - vmv%m1r.v\t%0,%1" + #" + "&& !memory_operand (operands[0], mode) + && !memory_operand (operands[1], mode)" + [(parallel [(set (match_dup 0) (match_dup 1)) + (use (reg:SI VTYPE_REGNUM))])] + "" [(set_attr "type" "vldr,vstr,vmov") (set_attr "mode" "")]) -(define_insn "*mov_fract" +;; Full-register moves like vmv1r.v require a valid vtype. +;; The ABI does not guarantee that the vtype is valid after a function +;; call so we need to make it dependent on the vtype and have +;; the vsetvl pass insert a vsetvl if necessary. +;; To facilitate optimization we keep the reg-reg move patterns "regular" +;; until split time and only then switch to a pattern like below that +;; uses the vtype register. +;; As the use of these patterns is limited (in the general context) +;; there is no need for helper functions and we can just create the RTX +;; directly. +(define_insn "*mov_reg_whole_vtype" + [(set (match_operand:V_WHOLE 0 "reg_or_mem_operand" "=vr") + (match_operand:V_WHOLE 1 "reg_or_mem_operand" " vr")) + (use (reg:SI VTYPE_REGNUM))] + "TARGET_VECTOR && !TARGET_XTHEADVECTOR" + "vmv%m1r.v\t%0,%1" + [(set_attr "type" "vmov") + (set_attr "mode" "")]) + +(define_insn_and_split "*mov_fract" [(set (match_operand:V_FRACT 0 "register_operand" "=vr") (match_operand:V_FRACT 1 "register_operand" " vr"))] "TARGET_VECTOR" + "#" + "&& 1" + [(parallel [(set (match_dup 0) (match_dup 1)) + (use (reg:SI VTYPE_REGNUM))])] + "" + [(set_attr "type" "vmov") + (set_attr "mode" "")]) + +(define_insn "*mov_fract_vtype" + [(set (match_operand:V_FRACT 0 "register_operand" "=vr") + (match_operand:V_FRACT 1 "register_operand" " vr")) + (use (reg:SI VTYPE_REGNUM))] + "TARGET_VECTOR" "vmv1r.v\t%0,%1" [(set_attr "type" "vmov") (set_attr "mode" "")]) @@ -1249,10 +1287,23 @@ DONE; }) -(define_insn "*mov" +(define_insn_and_split "*mov" [(set (match_operand:VB 0 "register_operand" "=vr") (match_operand:VB 1 "register_operand" " vr"))]
[gcc r15-5652] genemit: Distribute evenly to files [PR111600].
https://gcc.gnu.org/g:2e6b3308af6ddf87925321ddd2d387bfd352e410 commit r15-5652-g2e6b3308af6ddf87925321ddd2d387bfd352e410 Author: Robin Dapp Date: Thu Nov 21 15:34:37 2024 +0100 genemit: Distribute evenly to files [PR111600]. currently we distribute insn patterns in genemit, partitioning them by the number of patterns per file. The first 100 into file 1, the next 100 into file 2, and so on. Depending on the patterns this can lead to files of very uneven sizes. Similar to the genmatch split, this patch introduces a dynamic choose_output () which considers the size of the output files and selects the shortest one for the next pattern. gcc/ChangeLog: PR target/111600 * genemit.cc (handle_arg): Use files instead of filenames. (main): Ditto. * gensupport.cc (SIZED_BASED_CHUNKS): Define. (choose_output): New function. * gensupport.h (choose_output): Declare. Diff: --- gcc/genemit.cc| 54 +- gcc/gensupport.cc | 33 + gcc/gensupport.h | 1 + 3 files changed, 51 insertions(+), 37 deletions(-) diff --git a/gcc/genemit.cc b/gcc/genemit.cc index 5d3d10f5061a..ee2f06cb7c2b 100644 --- a/gcc/genemit.cc +++ b/gcc/genemit.cc @@ -905,14 +905,15 @@ from the machine description file `md'. */\n\n"); fprintf (file, "#include \"target.h\"\n\n"); } -auto_vec output_files; +auto_vec output_files; static bool handle_arg (const char *arg) { if (arg[1] == 'O') { - output_files.safe_push (&arg[2]); + FILE *file = fopen (&arg[2], "w"); + output_files.safe_push (file); return true; } return false; @@ -933,47 +934,21 @@ main (int argc, const char **argv) /* Assign sequential codes to all entries in the machine description in parallel with the tables in insn-output.cc. */ - int npatterns = count_patterns (); md_rtx_info info; - bool to_stdout = false; - int npatterns_per_file = npatterns; - if (!output_files.is_empty ()) -npatterns_per_file = npatterns / output_files.length () + 1; - else -to_stdout = true; - - gcc_assert (npatterns_per_file > 1); + if (output_files.is_empty ()) +output_files.safe_push (stdout); - /* Reverse so we can pop the first-added element. */ - output_files.reverse (); + for (auto f : output_files) +print_header (f); - int count = 0; FILE *file = NULL; + unsigned file_idx; /* Read the machine description. */ while (read_md_rtx (&info)) { - if (count == 0 || count == npatterns_per_file) - { - bool is_last = !to_stdout && output_files.is_empty (); - if (file && !is_last) - if (fclose (file) != 0) - return FATAL_EXIT_CODE; - - if (!output_files.is_empty ()) - { - const char *const filename = output_files.pop (); - file = fopen (filename, "w"); - } - else if (to_stdout) - file = stdout; - else - break; - - print_header (file); - count = 0; - } + file = choose_output (output_files, file_idx); switch (GET_CODE (info.def)) { @@ -999,10 +974,10 @@ main (int argc, const char **argv) default: break; } - - count++; } + file = choose_output (output_files, file_idx); + /* Write out the routines to add CLOBBERs to a pattern and say whether they clobber a hard reg. */ output_add_clobbers (&info, file); @@ -1015,5 +990,10 @@ main (int argc, const char **argv) handle_overloaded_gen (oname, file); } - return (fclose (file) != 0 ? FATAL_EXIT_CODE : SUCCESS_EXIT_CODE); + int ret = SUCCESS_EXIT_CODE; + for (FILE *f : output_files) +if (fclose (f) != 0) + ret = FATAL_EXIT_CODE; + + return ret; } diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc index 3a02132c8761..e0adf0c1bc54 100644 --- a/gcc/gensupport.cc +++ b/gcc/gensupport.cc @@ -3913,3 +3913,36 @@ find_optab (optab_pattern *p, const char *name) } return false; } + +/* Find the file to write into next. We try to evenly distribute the contents + over the different files. */ + +#define SIZED_BASED_CHUNKS 1 + +FILE * +choose_output (const vec &parts, unsigned &idx) +{ + if (parts.length () == 0) +gcc_unreachable (); +#ifdef SIZED_BASED_CHUNKS + FILE *shortest = NULL; + long min = 0; + idx = 0; + for (unsigned i = 0; i < parts.length (); i++) +{ + FILE *part = parts[i]; + long len = ftell (part); + if (!shortest || min > len) + { + shortest = part; + min = len; + idx = i; + } +} + return shortest; +#else + static int current_file; + idx = current_file++ % parts.length (); + return parts[idx]; +#endif +} diff --git a/gcc/gensupport.h b/gcc/gensupport.h index b7a1da34518c..781c9e9ffcea 100644 --- a/gcc/g
[gcc r15-5673] RISC-V: avlprop: Do not propagate VL from slidedown.
https://gcc.gnu.org/g:9c82afd42e7b5c3bdb849c66879138e59d8eb866 commit r15-5673-g9c82afd42e7b5c3bdb849c66879138e59d8eb866 Author: Robin Dapp Date: Mon Nov 25 12:40:53 2024 +0100 RISC-V: avlprop: Do not propagate VL from slidedown. In the following situation (found in the rvv/autovec/vls-vlmax/shuffle-slide.c test which is not yet pushed) vsetivlizero,4,e8,mf4,ta,ma vle8.v v2,0(a1)# (1) vle8.v v1,0(a2)# (2) vsetivlizero,2,e8,mf4,tu,ma vslidedown.vi v1,v2,2 vsetivlizero,4,e8,mf4,ta,ma vse8.v v1,0(a2) we wrongly "propagate" VL=2 from vslidedown into the load. Although we check whether the "target" instruction has a merge operand the check only handles cases where the merge operand itself is loaded, like (2) in the snippet above. For (1) we load the non-merged operand, assume propagation is valid and continue despite (2). This patch just re-uses avl_can_be_propagated_p in order to disable slides altogether in such situations. gcc/ChangeLog: * config/riscv/riscv-avlprop.cc (pass_avlprop::get_vlmax_ta_preferred_avl): Check whether the use insn is valid for propagation. Diff: --- gcc/config/riscv/riscv-avlprop.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc index 91d80aa00d62..62491f9be2d4 100644 --- a/gcc/config/riscv/riscv-avlprop.cc +++ b/gcc/config/riscv/riscv-avlprop.cc @@ -351,7 +351,8 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn) const if (!use_insn->can_be_optimized () || use_insn->is_asm () || use_insn->is_call () || use_insn->has_volatile_refs () || use_insn->has_pre_post_modify () - || !has_vl_op (use_insn->rtl ())) + || !has_vl_op (use_insn->rtl ()) + || !avl_can_be_propagated_p (use_insn->rtl ())) return NULL_RTX; /* We should only propagate non-VLMAX AVL into VLMAX insn when
[gcc r15-6278] RISC-V: Fix compress shuffle pattern [PR117383].
https://gcc.gnu.org/g:ec870d3b5f378172006104bad674d7875463da18 commit r15-6278-gec870d3b5f378172006104bad674d7875463da18 Author: Robin Dapp Date: Wed Dec 11 20:48:30 2024 +0100 RISC-V: Fix compress shuffle pattern [PR117383]. This patch makes vcompress use the tail-undisturbed policy by default and also uses the proper VL. PR target/117383 gcc/ChangeLog: * config/riscv/riscv-protos.h (enum insn_type): Use TU policy. * config/riscv/riscv-v.cc (shuffle_compress_patterns): Set VL. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c: Expect tu. * gcc.target/riscv/rvv/autovec/pr117383.c: New test. Diff: --- gcc/config/riscv/riscv-protos.h| 4 +- gcc/config/riscv/riscv-v.cc| 3 +- .../riscv/rvv/autovec/binop/vcompress-avlprop-1.c | 2 +- .../gcc.target/riscv/rvv/autovec/pr117383.c| 48 ++ 4 files changed, 53 insertions(+), 4 deletions(-) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 98af41c6e742..e36309bd7288 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -509,9 +509,9 @@ enum insn_type : unsigned int /* For vcompress.vm */ COMPRESS_OP = __NORMAL_OP_TA2 | BINARY_OP_P, - /* has merge operand but use ta. */ + /* has merge operand but use tu. */ COMPRESS_OP_MERGE - = HAS_DEST_P | HAS_MERGE_P | TDEFAULT_POLICY_P | BINARY_OP_P, + = HAS_DEST_P | HAS_MERGE_P | TU_POLICY_P | BINARY_OP_P, /* For vslideup.up has merge operand but use ta. */ SLIDEUP_OP_MERGE = HAS_DEST_P | HAS_MASK_P | USE_ALL_TRUES_MASK_P diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 5fcdaca1fd0f..417c36a7587c 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3393,7 +3393,8 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d) insn_code icode = code_for_pred_compress (vmode); rtx ops[] = {d->target, merge, d->op0, mask}; - emit_vlmax_insn (icode, COMPRESS_OP_MERGE, ops); + emit_nonvlmax_insn (icode, COMPRESS_OP_MERGE, ops, + gen_int_mode (vlen, Pmode)); return true; } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c index 3654b03e8ed6..98e53b38f094 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c @@ -11,7 +11,7 @@ struct s sss[MAX]; /* ** build_linked_list: ** ... -** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*ta,\s*ma +** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*tu,\s*ma ** ... ** vcompress\.vm\s+v[0-9]+,\s*v[0-9]+,\s*v0 ** ... diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117383.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117383.c new file mode 100644 index ..c01612f29028 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117383.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-require-effective-target "riscv_v_ok" } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-additional-options "-std=c99 -mrvv-vector-bits=zvl" } */ + +typedef signed char int8_t; +typedef int8_t vnx64i __attribute__ ((vector_size (64))); + +#define MASK_64 \ + 1, 2, 3, 5, 7, 9, 10, 11, 12, 14, 15, 17, 19, 21, 22, 23, 26, 28, 30, 31, \ +37, 38, 41, 46, 47, 53, 54, 55, 60, 61, 62, 63, 76, 77, 78, 79, 80, 81, \ +82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, \ +100, 101, 102, 103, 104, 105, 106, 107 + +void __attribute__ ((noipa)) +test_1 (int8_t *x, int8_t *y, int8_t *out) +{ + vnx64i v1 = *(vnx64i *) x; + vnx64i v2 = *(vnx64i *) y; + vnx64i v3 = __builtin_shufflevector (v1, v2, MASK_64); + *(vnx64i *) out = v3; +} + +int +main (void) +{ + int8_t x[64]; + int8_t y[64]; + int8_t out[64]; + + for (int i = 0; i < 64; i++) +{ + x[i] = -i; + y[i] = i; +} + + test_1 (x, y, out); + + int mask[] = {MASK_64}; +#pragma GCC novector + for (int i = 0; i < 64; i++) +{ + int idx = mask[i] < 64 ? mask[i] : mask[i] - 64; + int ref = mask[i] < 64 ? x[idx] : y[idx]; + if (ref != out[i]) +__builtin_abort (); +} +}
[gcc r15-6277] RISC-V: Increase cost for vec_construct [PR118019].
https://gcc.gnu.org/g:ce199a952bfef3e27354a4586a17bc55274c1d3c commit r15-6277-gce199a952bfef3e27354a4586a17bc55274c1d3c Author: Robin Dapp Date: Fri Dec 13 11:23:03 2024 +0100 RISC-V: Increase cost for vec_construct [PR118019]. For a generic vec_construct from scalar elements we need to load each scalar element and move it over to a vector register. Right now we only use a cost of 1 per element. This patch uses register-move cost as well as scalar_to_vec and multiplies it with the number of elements in the vector instead. PR target/118019 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_builtin_vectorization_cost): Increase vec_construct cost. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr118019.c: New test. Diff: --- gcc/config/riscv/riscv.cc | 8 +++- .../gcc.target/riscv/rvv/autovec/pr118019.c| 52 ++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index be2ebf9d9c09..aa8a4562d9af 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -12263,7 +12263,13 @@ riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost; case vec_construct: - return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype)); + { + /* TODO: This is too pessimistic in case we can splat. */ + int regmove_cost = fp ? costs->regmove->FR2VR + : costs->regmove->GR2VR; + return (regmove_cost + common_costs->scalar_to_vec_cost) + * estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype)); + } default: gcc_unreachable (); diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019.c new file mode 100644 index ..02b3ab44e7cb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118019.c @@ -0,0 +1,52 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv_zvl512b -mabi=lp64d -mstrict-align -mvector-strict-align" } */ + +/* Make sure we do not construct the vector element-wise despite + slow misaligned scalar and vector accesses. */ + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; + +#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) \ + { \ +int t0 = s0 + s1; \ +int t1 = s0 - s1; \ +int t2 = s2 + s3; \ +int t3 = s2 - s3; \ +d0 = t0 + t2; \ +d2 = t0 - t2; \ +d1 = t1 + t3; \ +d3 = t1 - t3; \ + } + +uint32_t +abs2 (uint32_t a) +{ + uint32_t s = ((a >> 15) & 0x10001) * 0x; + return (a + s) ^ s; +} + +int +x264_pixel_satd_8x4 (uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2) +{ + uint32_t tmp[4][4]; + uint32_t a0, a1, a2, a3; + int sum = 0; + for (int i = 0; i < 4; i++, pix1 += i_pix1, pix2 += i_pix2) +{ + a0 = (pix1[0] - pix2[0]) + ((pix1[4] - pix2[4]) << 16); + a1 = (pix1[1] - pix2[1]) + ((pix1[5] - pix2[5]) << 16); + a2 = (pix1[2] - pix2[2]) + ((pix1[6] - pix2[6]) << 16); + a3 = (pix1[3] - pix2[3]) + ((pix1[7] - pix2[7]) << 16); + HADAMARD4 (tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], a0, a1, a2, a3); +} + for (int i = 0; i < 4; i++) +{ + HADAMARD4 (a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]); + sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3); +} + return (((uint16_t) sum) + ((uint32_t) sum >> 16)) >> 1; +} + +/* { dg-final { scan-assembler-not "lbu" } } */
[gcc r15-6279] vect: Do not try to duplicate_and_interleave one-element mode.
https://gcc.gnu.org/g:99eef0cfa56573c32b9c0a1e43519ee4300ac63f commit r15-6279-g99eef0cfa56573c32b9c0a1e43519ee4300ac63f Author: Robin Dapp Date: Fri Sep 6 16:04:03 2024 +0200 vect: Do not try to duplicate_and_interleave one-element mode. PR112694 shows that we try to create sub-vectors of single-element vectors because can_duplicate_and_interleave_p returns true. The problem resurfaced in PR116611. This patch makes can_duplicate_and_interleave_p return false if count / nvectors > 0 and removes the corresponding check in the riscv backend. This partially gets rid of the FAIL in slp-19a.c. At least when built with cost model we don't have LOAD_LANES anymore. Without cost model, as in the test suite, we choose a different path and still end up with LOAD_LANES. Bootstrapped and regtested on x86 and power10, regtested on rv64gcv_zvfh_zvbb. Still waiting for the aarch64 results. Regards Robin gcc/ChangeLog: PR target/112694 PR target/116611. * config/riscv/riscv-v.cc (expand_vec_perm_const): Remove early return. * tree-vect-slp.cc (can_duplicate_and_interleave_p): Return false when we cannot create sub-elements. Diff: --- gcc/config/riscv/riscv-v.cc | 9 - gcc/tree-vect-slp.cc| 3 +++ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 417c36a7587c..b0de4c52b83c 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4011,15 +4011,6 @@ expand_vec_perm_const (machine_mode vmode, machine_mode op_mode, rtx target, mask to do the iteration loop control. Just disable it directly. */ if (GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL) return false; - /* FIXME: Explicitly disable VLA interleave SLP vectorization when we - may encounter ICE for poly size (1, 1) vectors in loop vectorizer. - Ideally, middle-end loop vectorizer should be able to disable it - itself, We can remove the codes here when middle-end code is able - to disable VLA SLP vectorization for poly size (1, 1) VF. */ - if (!BYTES_PER_RISCV_VECTOR.is_constant () - && maybe_lt (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL, - poly_int64 (16, 16))) -return false; struct expand_vec_perm_d d; diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 9ad95104ec7d..7bad268d406a 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -490,6 +490,9 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, if (!multiple_p (elt_bytes, 2, &elt_bytes)) return false; nvectors *= 2; + /* We need to be able to fuse COUNT / NVECTORS elements together. */ + if (!multiple_p (count, nvectors)) + return false; } }
[gcc r15-6280] docs: Fix [us]abd pattern name.
https://gcc.gnu.org/g:65c09538420ed820a24aac982a0299226b879d91 commit r15-6280-g65c09538420ed820a24aac982a0299226b879d91 Author: Robin Dapp Date: Thu Dec 12 11:46:32 2024 +0100 docs: Fix [us]abd pattern name. The uabd and sabd optab name is missing a 3 suffix (for its three arguments). This patch adds it. gcc/ChangeLog: * doc/md.texi: Add "3" suffix. Diff: --- gcc/doc/md.texi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index f0b63a144ad2..523ce9bce17e 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -6208,8 +6208,8 @@ Other shift and rotate instructions, analogous to the Vector shift and rotate instructions that take vectors as operand 2 instead of a scalar type. -@cindex @code{uabd@var{m}} instruction pattern -@cindex @code{sabd@var{m}} instruction pattern +@cindex @code{uabd@var{m}3} instruction pattern +@cindex @code{sabd@var{m}3} instruction pattern @item @samp{uabd@var{m}}, @samp{sabd@var{m}} Signed and unsigned absolute difference instructions. These instructions find the difference between operands 1 and 2
[gcc r15-6212] RISC-V: Add interleave pattern.
https://gcc.gnu.org/g:cff3050a4fbec323629563b87c9a83bf3e7be908 commit r15-6212-gcff3050a4fbec323629563b87c9a83bf3e7be908 Author: Robin Dapp Date: Wed Oct 16 22:39:08 2024 +0200 RISC-V: Add interleave pattern. This patch adds efficient handling of interleaving patterns like [0 4 1 5] to vec_perm_const. It is implemented by a slideup and a gather. gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_interleave_patterns): New function. (expand_vec_perm_const_1): Use new function. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-interleave-run.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-interleave.c: New test. Diff: --- gcc/config/riscv/riscv-v.cc| 80 ++ .../rvv/autovec/vls-vlmax/shuffle-interleave-run.c | 122 + .../rvv/autovec/vls-vlmax/shuffle-interleave.c | 69 3 files changed, 271 insertions(+) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 42c4e7d0f9ec..d58632b0a095 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3494,6 +3494,84 @@ shuffle_slide_patterns (struct expand_vec_perm_d *d) return true; } +/* Recognize interleaving patterns like [0 4 1 5]. */ + +static bool +shuffle_interleave_patterns (struct expand_vec_perm_d *d) +{ + machine_mode vmode = d->vmode; + machine_mode sel_mode = related_int_vector_mode (vmode).require (); + poly_int64 vec_len = d->perm.length (); + int n_patterns = d->perm.encoding ().npatterns (); + + if (!vec_len.is_constant ()) +return false; + + if (n_patterns != 2) +return false; + + unsigned vlen = vec_len.to_constant (); + + if (vlen < 4 || vlen > 64) +return false; + + if (d->one_vector_p) +return false; + + bool low = true; + if (d->perm.series_p (0, 2, 0, 1) + && d->perm.series_p (1, 2, vlen, 1)) +low = true; + else if (d->perm.series_p (0, 2, vlen / 2, 1) + && d->perm.series_p (1, 2, vlen + vlen / 2, 1)) +low = false; + else +return false; + + vec_perm_builder sel (vlen, 2, 1); + sel.safe_grow (vlen); + int cnt = 0; + for (unsigned i = 0; i < vlen; i += 2) +{ + sel[i] = cnt; + sel[i + 1] = cnt + vlen / 2; + cnt++; +} + + vec_perm_indices indices (sel, 2, vlen); + + if (vlen != indices.length ().to_constant ()) +return false; + + /* Success! */ + if (d->testing_p) +return true; + + int slide_cnt = vlen / 2; + rtx tmp = gen_reg_rtx (vmode); + + if (low) +{ + /* No need for a vector length because we slide up until the +end of OP1 anyway. */ + rtx ops[] = {tmp, d->op0, d->op1, gen_int_mode (slide_cnt, Pmode)}; + insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEUP, vmode); + emit_vlmax_insn (icode, SLIDEUP_OP_MERGE, ops); +} + else +{ + rtx ops[] = {tmp, d->op1, d->op0, gen_int_mode (slide_cnt, Pmode)}; + insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN, vmode); + emit_nonvlmax_insn (icode, BINARY_OP_TUMA, ops, + gen_int_mode (slide_cnt, Pmode)); +} + + rtx sel_rtx = vec_perm_indices_to_rtx (sel_mode, indices); + emit_vlmax_gather_insn (gen_lowpart (vmode, d->target), tmp, sel_rtx); + + return true; +} + /* Recognize decompress patterns: 1. VEC_PERM_EXPR op0 and op1 @@ -3810,6 +3888,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return true; if (shuffle_slide_patterns (d)) return true; + if (shuffle_interleave_patterns (d)) + return true; if (shuffle_compress_patterns (d)) return true; if (shuffle_decompress_patterns (d)) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-interleave-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-interleave-run.c new file mode 100644 index ..57748d95362f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-interleave-run.c @@ -0,0 +1,122 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-add-options riscv_v } */ +/* { dg-additional-options "-O3 -mrvv-max-lmul=m8 -std=gnu99" } */ + +#include "shuffle-interleave.c" + +#define SERIES_2(x, y) (x), (x + 1) +#define SERIES_4(x, y) SERIES_2 (x, y), SERIES_2 (x + 2, y) +#define SERIES_8(x, y) SERIES_4 (x, y), SERIES_4 (x + 4, y) +#define SERIES_16(x, y) SERIES_8 (x, y), SERIES_8 (x + 8, y) +#define SERIES_32(x, y) SERIES_16 (x, y), SERIES_16 (x + 16, y) +#define SERIES_64(x, y) SERIES_32 (x, y), SERIES_32 (x + 32, y) + +#define comp(a, b, n) \ + for (unsigned i = 0; i < n; ++i) \ +if ((a)[i] != (b)[i]) \ + __builtin_abort
[gcc r15-6210] RISC-V: Emit vector shift pattern for const_vector [PR117353].
https://gcc.gnu.org/g:cfdab86f20f6e77d9c8bf982989f78ef975c7611 commit r15-6210-gcfdab86f20f6e77d9c8bf982989f78ef975c7611 Author: Robin Dapp Date: Thu Dec 12 10:33:28 2024 +0100 RISC-V: Emit vector shift pattern for const_vector [PR117353]. In PR117353 and PR117878 we expand a const vector during reload. For this we use an unpredicated left shift. Normally an insn like this is split but as we introduce it late and cannot create pseudos anymore it remains unpredicated and is not recognized by the vsetvl pass (where we expect all insns to be in predicated RVV format). This patch directly emits a predicated shift instead. We could distinguish between !lra_in_progress and lra_in_progress and emit an unpredicated shift in the former case but we're not very likely to optimize it anyway so it doesn't seem worth it. PR target/117353 PR target/117878 gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_const_vector): Use predicated instead of simple shift. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr117353.c: New test. Diff: --- gcc/config/riscv/riscv-v.cc| 8 +++--- .../gcc.target/riscv/rvv/autovec/pr117353.c| 29 ++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 47bc0255aa38..2530fd9c9799 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1439,9 +1439,11 @@ expand_const_vector (rtx target, rtx src) rtx shift_count = gen_int_mode (exact_log2 (builder.npatterns ()), builder.inner_mode ()); - rtx tmp1 = expand_simple_binop (builder.mode (), LSHIFTRT, -vid, shift_count, NULL_RTX, -false, OPTAB_DIRECT); + rtx tmp1 = gen_reg_rtx (builder.mode ()); + rtx shift_ops[] = {tmp1, vid, shift_count}; + emit_vlmax_insn (code_for_pred_scalar + (LSHIFTRT, builder.mode ()), BINARY_OP, + shift_ops); /* Step 3: Generate tmp2 = tmp1 * step. Â */ rtx tmp2 = gen_reg_rtx (builder.mode ()); diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117353.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117353.c new file mode 100644 index ..135a00194c9d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr117353.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=rv64gcv_zvl256b -mabi=lp64d" } */ + +int *b; + +inline void c (char *d, int e) +{ + d[0] = 0; + d[1] = e; +} + +void f (); + +void h () +{ + for (;;) +{ + char *a; + long g = 8; + while (g) + { + c (a, *b); + b++; + a += 2; + g--; + } + f (); +} +}
[gcc r15-6211] RISC-V: Add slide to perm_const strategies.
https://gcc.gnu.org/g:71bfc8c33e63f4a566079d34ed3bc98f45133e96 commit r15-6211-g71bfc8c33e63f4a566079d34ed3bc98f45133e96 Author: Robin Dapp Date: Mon Sep 16 22:22:14 2024 +0200 RISC-V: Add slide to perm_const strategies. This patch adds a shuffle_slide_patterns to expand_vec_perm_const. It recognizes permutations like {0, 1, 4, 5} or {2, 3, 6, 7} which can be constructed by a slideup or slidedown of one of the vectors into the other one. gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_slide_patterns): New. (expand_vec_perm_const_1): Call new function. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slide-run.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slide.c: New test. Diff: --- gcc/config/riscv/riscv-v.cc| 99 .../rvv/autovec/vls-vlmax/shuffle-slide-run.c | 266 + .../riscv/rvv/autovec/vls-vlmax/shuffle-slide.c| 207 3 files changed, 572 insertions(+) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 2530fd9c9799..42c4e7d0f9ec 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3397,6 +3397,103 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d) return true; } +/* Recognize patterns like [4 5 6 7 12 13 14 15] where either the lower + or the higher parts of both vectors are combined into one. */ + +static bool +shuffle_slide_patterns (struct expand_vec_perm_d *d) +{ + machine_mode vmode = d->vmode; + poly_int64 vec_len = d->perm.length (); + + if (!vec_len.is_constant ()) +return false; + + int vlen = vec_len.to_constant (); + if (vlen < 4) +return false; + + if (d->one_vector_p) +return false; + + /* For a slideup OP0 can stay, for a slidedown OP1 can. + The former requires that the first element of the permutation + is the first element of OP0, the latter that the last permutation + element is the last element of OP1. */ + bool slideup = false; + bool slidedown = false; + + /* For a slideup the permutation must start at OP0's first element. */ + if (known_eq (d->perm[0], 0)) +slideup = true; + + /* For a slidedown the permutation must end at OP1's last element. */ + if (known_eq (d->perm[vlen - 1], 2 * vlen - 1)) +slidedown = true; + + if (slideup && slidedown) +return false; + + if (!slideup && !slidedown) +return false; + + /* Check for a monotonic sequence with one pivot. */ + int pivot = -1; + for (int i = 0; i < vlen; i++) +{ + if (pivot == -1 && known_ge (d->perm[i], vec_len)) + pivot = i; + if (i > 0 && i != pivot + && maybe_ne (d->perm[i], d->perm[i - 1] + 1)) + return false; +} + + if (pivot == -1) +return false; + + /* For a slideup OP1's part (to be slid up) must be a low part, + i.e. starting with its first element. */ + if (slideup && maybe_ne (d->perm[pivot], vlen)) + return false; + + /* For a slidedown OP0's part (to be slid down) must be a high part, + i.e. ending with its last element. */ + if (slidedown && maybe_ne (d->perm[pivot - 1], vlen - 1)) +return false; + + /* Success! */ + if (d->testing_p) +return true; + + /* PIVOT is the start of the lower/higher part of OP1 or OP2. + For a slideup it indicates how many elements of OP1 to + skip/slide over. For a slidedown it indicates how long + OP1's high part is, while VLEN - PIVOT is the amount to slide. */ + int slide_cnt = slideup ? pivot : vlen - pivot; + insn_code icode; + if (slideup) +{ + /* No need for a vector length because we slide up until the +end of OP1 anyway. */ + rtx ops[] = {d->target, d->op0, d->op1, gen_int_mode (slide_cnt, Pmode)}; + icode = code_for_pred_slide (UNSPEC_VSLIDEUP, vmode); + emit_vlmax_insn (icode, SLIDEUP_OP_MERGE, ops); +} + else +{ + /* Here we need a length because we slide to the beginning of OP1 +leaving the remaining elements undisturbed. */ + int len = pivot; + rtx ops[] = {d->target, d->op1, d->op0, + gen_int_mode (slide_cnt, Pmode)}; + icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN, vmode); + emit_nonvlmax_insn (icode, BINARY_OP_TUMA, ops, + gen_int_mode (len, Pmode)); +} + + return true; +} + /* Recognize decompress patterns: 1. VEC_PERM_EXPR op0 and op1 @@ -3711,6 +3808,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return true; if (shuffle_consecutive_patterns (d)) return true; + if (shuffle_slide_patterns (d)) + return true; if (shuffle_compress_patterns (d)) return true; if (shuffle_decompress_patterns (d)) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-slide-run.
[gcc r15-6213] RISC-V: Add even/odd vec_perm_const pattern.
https://gcc.gnu.org/g:528567a7b1589735408eaa133206a0683162188e commit r15-6213-g528567a7b1589735408eaa133206a0683162188e Author: Robin Dapp Date: Thu Oct 17 11:33:19 2024 +0200 RISC-V: Add even/odd vec_perm_const pattern. This adds handling for even/odd patterns. gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_even_odd_patterns): New function. (expand_vec_perm_const_1): Use new function. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd.c: New test. Diff: --- gcc/config/riscv/riscv-v.cc| 66 +++ .../rvv/autovec/vls-vlmax/shuffle-evenodd-run.c| 122 + .../riscv/rvv/autovec/vls-vlmax/shuffle-evenodd.c | 68 3 files changed, 256 insertions(+) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index d58632b0a095..517a016270b8 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3572,6 +3572,70 @@ shuffle_interleave_patterns (struct expand_vec_perm_d *d) return true; } + +/* Recognize even/odd patterns like [0 2 4 6]. We use two compress + and one slideup. */ + +static bool +shuffle_even_odd_patterns (struct expand_vec_perm_d *d) +{ + machine_mode vmode = d->vmode; + poly_int64 vec_len = d->perm.length (); + int n_patterns = d->perm.encoding ().npatterns (); + + if (n_patterns != 1) +return false; + + if (!vec_len.is_constant ()) +return false; + + int vlen = vec_len.to_constant (); + if (vlen < 4 || vlen > 64) +return false; + + if (d->one_vector_p) +return false; + + bool even = true; + if (!d->perm.series_p (0, 1, 0, 2)) +{ + even = false; + if (!d->perm.series_p (0, 1, 1, 2)) + return false; +} + + /* Success! */ + if (d->testing_p) +return true; + + machine_mode mask_mode = get_mask_mode (vmode); + rvv_builder builder (mask_mode, vlen, 1); + int bit = even ? 0 : 1; + for (int i = 0; i < vlen; i++) +{ + bit ^= 1; + if (bit) + builder.quick_push (CONST1_RTX (BImode)); + else + builder.quick_push (CONST0_RTX (BImode)); +} + rtx mask = force_reg (mask_mode, builder.build ()); + + insn_code icode = code_for_pred_compress (vmode); + rtx ops1[] = {d->target, d->op0, mask}; + emit_vlmax_insn (icode, COMPRESS_OP, ops1); + + rtx tmp2 = gen_reg_rtx (vmode); + rtx ops2[] = {tmp2, d->op1, mask}; + emit_vlmax_insn (icode, COMPRESS_OP, ops2); + + rtx ops[] = {d->target, d->target, tmp2, gen_int_mode (vlen / 2, Pmode)}; + icode = code_for_pred_slide (UNSPEC_VSLIDEUP, vmode); + emit_vlmax_insn (icode, SLIDEUP_OP_MERGE, ops); + + return true; +} + /* Recognize decompress patterns: 1. VEC_PERM_EXPR op0 and op1 @@ -3890,6 +3954,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return true; if (shuffle_interleave_patterns (d)) return true; + if (shuffle_even_odd_patterns (d)) + return true; if (shuffle_compress_patterns (d)) return true; if (shuffle_decompress_patterns (d)) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c new file mode 100644 index ..ff1ffa42ee1b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/shuffle-evenodd-run.c @@ -0,0 +1,122 @@ +/* { dg-do run } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-add-options riscv_v } */ +/* { dg-additional-options "-O3 -mrvv-max-lmul=m8 -std=gnu99" } */ + +#include "shuffle-evenodd.c" + +#define SERIES_2(x, y) (x), (x + 1) +#define SERIES_4(x, y) SERIES_2 (x, y), SERIES_2 (x + 2, y) +#define SERIES_8(x, y) SERIES_4 (x, y), SERIES_4 (x + 4, y) +#define SERIES_16(x, y) SERIES_8 (x, y), SERIES_8 (x + 8, y) +#define SERIES_32(x, y) SERIES_16 (x, y), SERIES_16 (x + 16, y) +#define SERIES_64(x, y) SERIES_32 (x, y), SERIES_32 (x + 32, y) + +#define comp(a, b, n) \ + for (unsigned i = 0; i < n; ++i) \ +if ((a)[i] != (b)[i]) \ + __builtin_abort (); + +#define CHECK1(TYPE, NUNITS) \ + __attribute__ ((noipa)) void check1_##TYPE () \ + { \ +TYPE v0 = (TYPE){SERIES_##NUNITS (0, NUNITS)}; \ +TYPE v1 = (TYPE){SERIES_##NUNITS (NUNITS, NUNITS)}; \ +TYPE ref = (TYPE){MASKE_##NUNITS (0, NUNITS)}; \ +TYPE res;
[gcc r15-6214] RISC-V: Improve slide1up pattern.
https://gcc.gnu.org/g:12a5ab146110631edffcd307a0c10773160f2723 commit r15-6214-g12a5ab146110631edffcd307a0c10773160f2723 Author: Robin Dapp Date: Sat Nov 16 15:13:09 2024 +0100 RISC-V: Improve slide1up pattern. This patch adds a second variant to implement the extract/slide1up pattern. In order to do a permutation like <3, 4, 5, 6> from vectors <0, 1, 2, 3> and <4, 5, 6, 7> we currently extract <3> from the first vector and re-insert it into the second vector. Unless register-file crossing latency is essentially zero it should be preferable to first slide the second vector up by one, then slide down the first vector by (nunits - 1). gcc/ChangeLog: * config/riscv/riscv-protos.h (riscv_register_move_cost): Export. * config/riscv/riscv-v.cc (shuffle_extract_and_slide1up_patterns): Rename... (shuffle_off_by_one_patterns): ... to this and add slideup/slidedown variant. (expand_vec_perm_const_1): Call renamed function. * config/riscv/riscv.cc (riscv_secondary_memory_needed): Remove static. (riscv_register_move_cost): Add VR<->GR/FR handling. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr112599-2.c: Adjust test expectation. Diff: --- gcc/config/riscv/riscv-protos.h| 1 + gcc/config/riscv/riscv-v.cc| 52 -- gcc/config/riscv/riscv.cc | 18 +++- .../gcc.target/riscv/rvv/autovec/pr112599-2.c | 2 +- 4 files changed, 57 insertions(+), 16 deletions(-) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 99ee6ef1e0dc..98af41c6e742 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -139,6 +139,7 @@ extern void riscv_expand_ussub (rtx, rtx, rtx); extern void riscv_expand_sssub (rtx, rtx, rtx); extern void riscv_expand_ustrunc (rtx, rtx); extern void riscv_expand_sstrunc (rtx, rtx); +extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t); #ifdef RTX_CODE extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 517a016270b8..5fcdaca1fd0f 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3798,11 +3798,13 @@ shuffle_bswap_pattern (struct expand_vec_perm_d *d) return true; } -/* Recognize the pattern that can be shuffled by vec_extract and slide1up - approach. */ +/* Recognize patterns like [3 4 5 6] where we combine the last element + of the first vector and the first n - 1 elements of the second vector. + This can be implemented by slides or by extracting and re-inserting + (slide1up) the first vector's last element. */ static bool -shuffle_extract_and_slide1up_patterns (struct expand_vec_perm_d *d) +shuffle_off_by_one_patterns (struct expand_vec_perm_d *d) { poly_int64 nunits = GET_MODE_NUNITS (d->vmode); @@ -3820,17 +3822,39 @@ shuffle_extract_and_slide1up_patterns (struct expand_vec_perm_d *d) if (d->testing_p) return true; - /* Extract the last element of the first vector. */ - scalar_mode smode = GET_MODE_INNER (d->vmode); - rtx tmp = gen_reg_rtx (smode); - emit_vec_extract (tmp, d->op0, gen_int_mode (nunits - 1, Pmode)); + int scalar_cost = riscv_register_move_cost (d->vmode, V_REGS, GR_REGS) ++ riscv_register_move_cost (d->vmode, GR_REGS, V_REGS) + 2; + int slide_cost = 2; + + if (slide_cost < scalar_cost) +{ + /* This variant should always be preferable because we just need two +slides. The extract-variant also requires two slides but additionally +pays the latency for register-file crossing. */ + rtx tmp = gen_reg_rtx (d->vmode); + rtx ops[] = {tmp, d->op1, gen_int_mode (1, Pmode)}; + insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEUP, d->vmode); + emit_vlmax_insn (icode, BINARY_OP, ops); + + rtx ops2[] = {d->target, tmp, d->op0, gen_int_mode (nunits - 1, Pmode)}; + icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN, d->vmode); + emit_nonvlmax_insn (icode, BINARY_OP_TUMA, ops2, gen_int_mode (1, Pmode)); +} + else +{ + /* Extract the last element of the first vector. */ + scalar_mode smode = GET_MODE_INNER (d->vmode); + rtx tmp = gen_reg_rtx (smode); + emit_vec_extract (tmp, d->op0, gen_int_mode (nunits - 1, Pmode)); + + /* Insert the scalar into element 0. */ + unsigned int unspec + = FLOAT_MODE_P (d->vmode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP; + insn_code icode = code_for_pred_slide (unspec, d->vmode); + rtx ops[] = {d->target, d->op1, tmp}; + emit_vlmax_insn (icode, BINARY_OP, ops); +} - /* Insert the scalar into element 0. */ - unsigned int unspec -= FLOAT_MODE_P (d->vmode)
[gcc r15-6223] genrecog: Split into separate partitions [PR111600].
https://gcc.gnu.org/g:6dcfe8743134936db17ffdfd0a5102a87338f494 commit r15-6223-g6dcfe8743134936db17ffdfd0a5102a87338f494 Author: Robin Dapp Date: Tue Nov 26 14:44:17 2024 +0100 genrecog: Split into separate partitions [PR111600]. Hi, this patch makes genrecog split its output into separate files (10 by default) in the same vein genemit does. The changes are mostly mechanical again, changing printfs and puts to fprintf. As insn-recog.cc relies on being able to call other recog functions a header insn-recog.h is introduced that pre declares all of those. For simplicity the number of files is determined by (re-using) --with-insnemit-partitions. Naming suggestions welcome :) Bootstrapped and regtested on x86 and power10, regtested on riscv. aarch64 bootstrap is currently blocked because of the "maybe uninitialized" issue discussed on IRC. Regards Robin PR target/111600 gcc/ChangeLog: * Makefile.in: Add insn-recog split. * configure: Regenerate. * configure.ac: Document that the number of insnemit partitions is used for insn-recog as well. * genconditions.cc (write_one_condition): Use fprintf. * genpreds.cc (write_predicate_expr): Ditto. (write_init_reg_class_start_regs): Ditto. * genrecog.cc (write_header): Add header file to includes. (printf_indent): Use fprintf. (change_state): Ditto. (print_code): Ditto. (print_host_wide_int): Ditto. (print_parameter_value): Ditto. (print_test_rtx): Ditto. (print_nonbool_test): Ditto. (print_label_value): Ditto. (print_test): Ditto. (print_decision): Ditto. (print_state): Ditto. (print_subroutine_call): Ditto. (print_acceptance): Ditto. (print_subroutine_start): Ditto. (print_pattern): Ditto. (print_subroutine): Ditto. (print_subroutine_group): Ditto. (handle_arg): Add -O and -H for output and header file handling. (main): Use callback. * gentarget-def.cc (def_target_insn): Use fprintf. * read-md.cc (md_reader::print_c_condition): Ditto. * read-md.h (class md_reader): Ditto. Diff: --- gcc/Makefile.in | 29 ++- gcc/configure| 4 +- gcc/configure.ac | 4 +- gcc/genconditions.cc | 4 +- gcc/genpreds.cc | 4 +- gcc/genrecog.cc | 552 ++- gcc/gentarget-def.cc | 2 +- gcc/read-md.cc | 4 +- gcc/read-md.h| 2 +- 9 files changed, 360 insertions(+), 245 deletions(-) diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 27fbbd4bf19e..493ec6a5cb6e 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -239,6 +239,12 @@ INSNEMIT_SEQ_SRC = $(patsubst %, insn-emit-%.cc, $(INSNEMIT_SPLITS_SEQ)) INSNEMIT_SEQ_TMP = $(patsubst %, tmp-emit-%.cc, $(INSNEMIT_SPLITS_SEQ)) INSNEMIT_SEQ_O = $(patsubst %, insn-emit-%.o, $(INSNEMIT_SPLITS_SEQ)) +# Re-use the split number for insn-recog as well. +INSNRECOG_SPLITS_SEQ = $(wordlist 1,$(NUM_INSNEMIT_SPLITS),$(one_to_)) +INSNRECOG_SEQ_SRC = $(patsubst %, insn-recog-%.cc, $(INSNRECOG_SPLITS_SEQ)) +INSNRECOG_SEQ_TMP = $(patsubst %, tmp-recog-%.cc, $(INSNRECOG_SPLITS_SEQ)) +INSNRECOG_SEQ_O = $(patsubst %, insn-recog-%.o, $(INSNRECOG_SPLITS_SEQ)) + # These files are to have specific diagnostics suppressed, or are not to # be subject to -Werror: # flex output may yield harmless "no previous prototype" warnings @@ -1385,7 +1391,7 @@ OBJS = \ insn-output.o \ insn-peep.o \ insn-preds.o \ - insn-recog.o \ + $(INSNRECOG_SEQ_O) \ insn-enums.o \ ggc-page.o \ adjust-alignment.o \ @@ -1909,8 +1915,8 @@ TREECHECKING = @TREECHECKING@ FULL_DRIVER_NAME=$(target_noncanonical)-gcc-$(version)$(exeext) MOSTLYCLEANFILES = insn-flags.h insn-config.h insn-codes.h \ - insn-output.cc insn-recog.cc $(INSNEMIT_SEQ_SRC) \ - insn-extract.cc insn-peep.cc \ + insn-output.cc $(INSNRECOG_SEQ_SRC) insn-recog.h \ + $(INSNEMIT_SEQ_SRC) insn-extract.cc insn-peep.cc \ insn-attr.h insn-attr-common.h insn-attrtab.cc insn-dfatab.cc \ insn-latencytab.cc insn-opinit.cc insn-opinit.h insn-preds.cc insn-constants.h \ tm-preds.h tm-constrs.h checksum-options $(GIMPLE_MATCH_PD_SEQ_SRC) \ @@ -2677,7 +2683,8 @@ $(common_out_object_file): $(common_out_file) # and compile them. .PRECIOUS: insn-config.h insn-flags.h insn-codes.h insn-constants.h \ - $(INSNEMIT_SEQ_SRC) insn-recog.cc insn-extract.cc insn-output.cc \ + $(INSNEMIT_SEQ_SRC) insn-recog.h $(INSNRECOG_SEQ_SRC) \ + insn-extract.cc insn-output.cc \ insn-peep.cc insn-attr.h insn-attr-common.h insn-attrtab.cc \ insn-dfatab.cc insn-latencytab.cc insn-preds.cc \ $(GIMPLE_MATCH_PD_
[gcc r15-7110] RISC-V: Unbreak bootstrap.
https://gcc.gnu.org/g:e324619281239bb513840600436b735dfbd32416 commit r15-7110-ge324619281239bb513840600436b735dfbd32416 Author: Robin Dapp Date: Tue Jan 21 18:07:41 2025 +0100 RISC-V: Unbreak bootstrap. This fixes a wrong format specifier and an unused variable which should re-enable bootstrap. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_file_end): Fix format string. (riscv_lshift_subword): Mark MODE as unused. Diff: --- gcc/config/riscv/riscv.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index f5e672bb7f50..5a3a05041773 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10375,7 +10375,7 @@ riscv_file_end () fprintf (asm_out_file, "\t.long\t4f - 3f\n"); fprintf (asm_out_file, "3:\n"); /* zicfiss, zicfilp. */ - fprintf (asm_out_file, "\t.long\t%x\n", feature_1_and); + fprintf (asm_out_file, "\t.long\t%lx\n", feature_1_and); fprintf (asm_out_file, "4:\n"); fprintf (asm_out_file, "\t.p2align\t%u\n", p2align); fprintf (asm_out_file, "5:\n"); @@ -11959,7 +11959,7 @@ riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask, /* Leftshift a subword within an SImode register. */ void -riscv_lshift_subword (machine_mode mode, rtx value, rtx shift, +riscv_lshift_subword (machine_mode mode ATTRIBUTE_UNUSED, rtx value, rtx shift, rtx *shifted_value) { rtx value_reg = gen_reg_rtx (SImode);
[gcc r15-7236] RISC-V: testsuite: Fix gather_load_64-12-zvbb.c
https://gcc.gnu.org/g:c0c2304e19be438a64841f6a3c56b134ba02d9a6 commit r15-7236-gc0c2304e19be438a64841f6a3c56b134ba02d9a6 Author: Robin Dapp Date: Wed Jan 22 16:19:49 2025 +0100 RISC-V: testsuite: Fix gather_load_64-12-zvbb.c The test fails with _zvfh because we vectorize more. Just adjust the test expectations. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c: Distinguish between zvfh and !zvfh. Diff: --- .../riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c index de5a5ed7d56a..698f0091390e 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/gather_load_64-12-zvbb.c @@ -106,7 +106,8 @@ TEST_LOOP (_Float16, uint64_t) TEST_LOOP (float, uint64_t) TEST_LOOP (double, uint64_t) -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 80 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 80 "vect" { target { ! riscv_zvfh } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 88 "vect" { target riscv_zvfh } } } */ /* { dg-final { scan-tree-dump " \.MASK_LEN_GATHER_LOAD" "vect" } } */ /* { dg-final { scan-tree-dump-not " \.GATHER_LOAD" "vect" } } */ /* { dg-final { scan-tree-dump-not " \.MASK_GATHER_LOAD" "vect" } } */
[gcc r15-7235] RISC-V: Disable two-source permutes for now [PR117173].
https://gcc.gnu.org/g:006b4e45f3ab169a47146b31d9721c79098236ac commit r15-7235-g006b4e45f3ab169a47146b31d9721c79098236ac Author: Robin Dapp Date: Thu Oct 17 18:39:16 2024 +0200 RISC-V: Disable two-source permutes for now [PR117173]. After testing on the BPI (4.2% improvement for x264 input 1, 4.4% for input 2) and the discussion in PR117173 I figured it's best to disable the two-source permutes by default for now. The patch adds a parameter "riscv-two-source-permutes" which restores the old behavior. PR target/117173 gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_generic_patterns): Only support single-source permutes by default. * config/riscv/riscv.opt: New param "riscv-two-source-permutes". gcc/testsuite/ChangeLog: * gcc.dg/fold-perm-2.c: Run with two-source permutes. * gcc.dg/pr54346.c: Ditto. Diff: --- gcc/config/riscv/riscv-v.cc| 13 - gcc/config/riscv/riscv.opt | 4 gcc/testsuite/gcc.dg/fold-perm-2.c | 1 + gcc/testsuite/gcc.dg/pr54346.c | 1 + 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index e1172e9c7d2b..9847439ca779 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -3947,11 +3947,22 @@ shuffle_generic_patterns (struct expand_vec_perm_d *d) if (!get_gather_index_mode (d).exists (&sel_mode)) return false; + rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm); + poly_uint64 nunits = GET_MODE_NUNITS (sel_mode); + rtx elt; + + bool is_simple = d->one_vector_p +|| const_vec_duplicate_p (sel, &elt) +|| (nunits.is_constant () + && const_vec_all_in_range_p (sel, 0, nunits - 1)); + + if (!is_simple && !riscv_two_source_permutes) +return false; + /* Success! */ if (d->testing_p) return true; - rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm); /* Some FIXED-VLMAX/VLS vector permutation situations call targethook instead of expand vec_perm, we handle it directly. */ expand_vec_perm (d->target, d->op0, d->op1, sel); diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index f51f8fd1cdf4..7515c8ea13dd 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -622,6 +622,10 @@ Enum(vsetvl_strategy) String(optim-no-fusion) Value(VSETVL_OPT_NO_FUSION) Target Undocumented RejectNegative Joined Enum(vsetvl_strategy) Var(vsetvl_strategy) Init(VSETVL_OPT) -param=vsetvl-strategy=Set the optimization level of VSETVL insert pass. +-param=riscv-two-source-permutes +Target Undocumented Uinteger Var(riscv_two_source_permutes) Init(0) +-param=riscv-two-source-permutes Enable permutes with two source vectors. + Enum Name(stringop_strategy) Type(enum stringop_strategy_enum) Valid arguments to -mstringop-strategy=: diff --git a/gcc/testsuite/gcc.dg/fold-perm-2.c b/gcc/testsuite/gcc.dg/fold-perm-2.c index 1a4ab4065de0..9fd809ee2967 100644 --- a/gcc/testsuite/gcc.dg/fold-perm-2.c +++ b/gcc/testsuite/gcc.dg/fold-perm-2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -fdump-tree-fre1" } */ +/* { dg-additional-options "--param=riscv-two-source-permutes" { target riscv*-*-* } } */ typedef int veci __attribute__ ((vector_size (4 * sizeof (int; typedef unsigned int vecu __attribute__ ((vector_size (4 * sizeof (unsigned int; diff --git a/gcc/testsuite/gcc.dg/pr54346.c b/gcc/testsuite/gcc.dg/pr54346.c index 5ec0609f1e50..b78e0533ac21 100644 --- a/gcc/testsuite/gcc.dg/pr54346.c +++ b/gcc/testsuite/gcc.dg/pr54346.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -fdump-tree-dse1 -Wno-psabi" } */ +/* { dg-additional-options "--param=riscv-two-source-permutes" { target riscv*-*-* } } */ typedef int veci __attribute__ ((vector_size (4 * sizeof (int;
[gcc r15-7237] RISC-V: testsuite: Fix reduc-8.c and reduc-9.c
https://gcc.gnu.org/g:f7dc4fd62ce4d9287988892b1e94bbdd0ca1c8fa commit r15-7237-gf7dc4fd62ce4d9287988892b1e94bbdd0ca1c8fa Author: Robin Dapp Date: Wed Jan 22 18:05:44 2025 +0100 RISC-V: testsuite: Fix reduc-8.c and reduc-9.c In both tests we expect a VEC_SHL_INSERT expression but we now add the initial value at the end. Just remove that scan check. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/reduc/reduc-8.c: Remove VEC_SHL_INSERT check. * gcc.target/riscv/rvv/autovec/reduc/reduc-9.c: Ditto. Diff: --- gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-8.c | 1 - gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-9.c | 1 - 2 files changed, 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-8.c index fe47aa3648dd..518f0c33cc4e 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-8.c @@ -12,5 +12,4 @@ add_loop (int *x, int n, int res) return res; } -/* { dg-final { scan-tree-dump-times "VEC_SHL_INSERT" 1 "optimized" } } */ /* { dg-final { scan-assembler-times {vslide1up\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-9.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-9.c index 6630d3027210..a5bb8dcccb81 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-9.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-9.c @@ -12,5 +12,4 @@ add_loop (float *x, int n, float res) return res; } -/* { dg-final { scan-tree-dump-times "VEC_SHL_INSERT" 1 "optimized" } } */ /* { dg-final { scan-assembler-times {vfslide1up\.vf\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 1 } } */
[gcc r15-6891] match: Keep conditional in simplification to constant [PR118140].
https://gcc.gnu.org/g:14cb0610559fa33f211e1546260458496fdc5e71 commit r15-6891-g14cb0610559fa33f211e1546260458496fdc5e71 Author: Robin Dapp Date: Fri Dec 27 17:29:25 2024 +0100 match: Keep conditional in simplification to constant [PR118140]. In PR118140 we simplify _ifc__33 = .COND_IOR (_41, d_lsm.7_11, _46, d_lsm.7_11); to 1: Match-and-simplified .COND_IOR (_41, d_lsm.7_11, _46, d_lsm.7_11) to 1 when _46 == 1. This happens by removing the conditional and applying a | 1 = 1. Normally we re-introduce the conditional and its else value if needed but that does not happen here as we're not dealing with a vector type. For correctness's sake, we must not remove the conditional even for non-vector types. This patch re-introduces a COND_EXPR in such cases. For PR118140 this result in a non-vectorized loop. PR middle-end/118140 gcc/ChangeLog: * gimple-match-exports.cc (maybe_resimplify_conditional_op): Add COND_EXPR when we simplified to a scalar gimple value but still have an else value. gcc/testsuite/ChangeLog: * gcc.dg/vect/pr118140.c: New test. * gcc.target/riscv/rvv/autovec/pr118140.c: New test. Diff: --- gcc/gimple-match-exports.cc| 26 +++ gcc/testsuite/gcc.dg/vect/pr118140.c | 27 .../gcc.target/riscv/rvv/autovec/pr118140.c| 29 ++ 3 files changed, 72 insertions(+), 10 deletions(-) diff --git a/gcc/gimple-match-exports.cc b/gcc/gimple-match-exports.cc index e06a8aaa1712..ccba046a1d4f 100644 --- a/gcc/gimple-match-exports.cc +++ b/gcc/gimple-match-exports.cc @@ -337,23 +337,29 @@ maybe_resimplify_conditional_op (gimple_seq *seq, gimple_match_op *res_op, } /* If the "then" value is a gimple value and the "else" value matters, - create a VEC_COND_EXPR between them, then see if it can be further + create a (VEC_)COND_EXPR between them, then see if it can be further simplified. */ gimple_match_op new_op; if (res_op->cond.else_value - && VECTOR_TYPE_P (res_op->type) && gimple_simplified_result_is_gimple_val (res_op)) { - tree len = res_op->cond.len; - if (!len) - new_op.set_op (VEC_COND_EXPR, res_op->type, - res_op->cond.cond, res_op->ops[0], - res_op->cond.else_value); + if (VECTOR_TYPE_P (res_op->type)) + { + tree len = res_op->cond.len; + if (!len) + new_op.set_op (VEC_COND_EXPR, res_op->type, + res_op->cond.cond, res_op->ops[0], + res_op->cond.else_value); + else + new_op.set_op (IFN_VCOND_MASK_LEN, res_op->type, + res_op->cond.cond, res_op->ops[0], + res_op->cond.else_value, + res_op->cond.len, res_op->cond.bias); + } else - new_op.set_op (IFN_VCOND_MASK_LEN, res_op->type, + new_op.set_op (COND_EXPR, res_op->type, res_op->cond.cond, res_op->ops[0], - res_op->cond.else_value, - res_op->cond.len, res_op->cond.bias); + res_op->cond.else_value); *res_op = new_op; return gimple_resimplify3 (seq, res_op, valueize); } diff --git a/gcc/testsuite/gcc.dg/vect/pr118140.c b/gcc/testsuite/gcc.dg/vect/pr118140.c new file mode 100644 index ..2dab98bfc913 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr118140.c @@ -0,0 +1,27 @@ +/* { dg-do run { target { aarch64*-*-* || riscv*-*-* } } } */ +/* { dg-additional-options "-std=gnu99" } */ + +long long a; +_Bool d; +char e; +_Bool f[17]; +_Bool f_3; + +int main() { + for (char g = 3; g < 16; g++) { + d |= ({ +int h = f[g - 1] ? 2 : 0; +_Bool t; +if (f[g - 1]) + t = f_3; +else + t = 0; +int i = t; +h > i; + }); +e += f[g + 1]; + } + + if (d != 0) +__builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118140.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118140.c new file mode 100644 index ..31134de7b3a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118140.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-add-options riscv_v } */ +/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */ + +long long a; +_Bool d; +char e; +_Bool f[17]; +_Bool f_3; + +int main() { + for (char g = 3; g < 16; g++) { + d |= ({ +int h = f[g - 1] ? 2 : 0; +_Bool t; +if (f[g - 1]) + t = f_3; +else + t = 0; +int i = t; +h > i; + }); +e += f[g + 1]; + } + + if (d != 0) +__builtin_abort (); +}
[gcc r15-6892] RISC-V: Fix vsetvl compatibility predicate [PR118154].
https://gcc.gnu.org/g:e5e9e50fc6816713d012f1d96ae308a0946d5a14 commit r15-6892-ge5e9e50fc6816713d012f1d96ae308a0946d5a14 Author: Robin Dapp Date: Thu Jan 9 20:45:10 2025 +0100 RISC-V: Fix vsetvl compatibility predicate [PR118154]. In PR118154 we emit strided stores but the first of those does not always have the proper VTYPE. That's because we erroneously delete a necessary vsetvl. In order to determine whether to elide (1) Expr[7]: VALID (insn 116, bb 17) Demand fields: demand_ratio_and_ge_sew demand_avl SEW=8, VLMUL=mf2, RATIO=16, MAX_SEW=64 TAIL_POLICY=agnostic, MASK_POLICY=agnostic AVL=(reg:DI 0 zero) when e.g. (2) Expr[3]: VALID (insn 360, bb 15) Demand fields: demand_sew_lmul demand_avl SEW=64, VLMUL=m1, RATIO=64, MAX_SEW=64 TAIL_POLICY=agnostic, MASK_POLICY=agnostic AVL=(reg:DI 0 zero) VL=(reg:DI 13 a3 [345]) is already available, we use sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p. (1) requires RATIO = SEW/LMUL = 16 and an SEW >= 8. (2) has ratio = 64, though, so we cannot directly elide (1). This patch uses ratio_eq_p instead of next_ratio_valid_for_prev_sew_p. PR target/118154 gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (MAX_LMUL): New define. (pre_vsetvl::earliest_fuse_vsetvl_info): Use. (pre_vsetvl::pre_global_vsetvl_info): New predicate with equal ratio. * config/riscv/riscv-vsetvl.def: Use. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr118154-1.c: New test. * gcc.target/riscv/rvv/autovec/pr118154-2.c: New test. Diff: --- gcc/config/riscv/riscv-vsetvl.cc | 14 +++--- gcc/config/riscv/riscv-vsetvl.def | 4 +-- .../gcc.target/riscv/rvv/autovec/pr118154-1.c | 23 .../gcc.target/riscv/rvv/autovec/pr118154-2.c | 31 ++ 4 files changed, 67 insertions(+), 5 deletions(-) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index e9de21787dda..a4016beebc0c 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -223,6 +223,8 @@ enum emit_type EMIT_AFTER, }; +static const int MAX_LMUL = 8; + /* dump helper functions */ static const char * vlmul_to_str (vlmul_type vlmul) @@ -1445,14 +1447,13 @@ private: inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev, const vsetvl_info &next) { -return prev.get_ratio () >= (next.get_sew () / 8); +return prev.get_ratio () >= (next.get_sew () / MAX_LMUL); } inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev, const vsetvl_info &next) { -return next.get_ratio () >= (prev.get_sew () / 8); +return next.get_ratio () >= (prev.get_sew () / MAX_LMUL); } - inline bool sew_ge_and_ratio_eq_p (const vsetvl_info &prev, const vsetvl_info &next) { @@ -1470,6 +1471,13 @@ private: return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next) && next_ratio_valid_for_prev_sew_p (prev, next); } + inline bool + sew_ge_and_prev_sew_le_next_max_sew_and_ratio_eq_p ( +const vsetvl_info &prev, const vsetvl_info &next) + { +return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next) + && ratio_eq_p (prev, next); + } inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info &prev, const vsetvl_info &next) { diff --git a/gcc/config/riscv/riscv-vsetvl.def b/gcc/config/riscv/riscv-vsetvl.def index 2dfff71d987f..d7a5ada772d0 100644 --- a/gcc/config/riscv/riscv-vsetvl.def +++ b/gcc/config/riscv/riscv-vsetvl.def @@ -53,8 +53,8 @@ DEF_SEW_LMUL_RULE (sew_lmul, ge_sew, sew_lmul, sew_ge_and_prev_sew_le_next_max_sew_p, nop) DEF_SEW_LMUL_RULE ( sew_lmul, ratio_and_ge_sew, sew_lmul, - sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p, - sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p, nop) + sew_ge_and_prev_sew_le_next_max_sew_and_ratio_eq_p, + sew_ge_and_prev_sew_le_next_max_sew_and_ratio_eq_p, nop) DEF_SEW_LMUL_RULE (ratio_only, sew_lmul, sew_lmul, ratio_eq_p, always_false, use_next_sew_lmul) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118154-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118154-1.c new file mode 100644 index ..55386568a5f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118154-1.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-require-effective-target riscv_v_ok } */ +/*
[gcc r14-11210] match: Keep conditional in simplification to constant [PR118140].
https://gcc.gnu.org/g:9b8488f662b070933d0427df22811bf1eaac661e commit r14-11210-g9b8488f662b070933d0427df22811bf1eaac661e Author: Robin Dapp Date: Fri Dec 27 17:29:25 2024 +0100 match: Keep conditional in simplification to constant [PR118140]. In PR118140 we simplify _ifc__33 = .COND_IOR (_41, d_lsm.7_11, _46, d_lsm.7_11); to 1: Match-and-simplified .COND_IOR (_41, d_lsm.7_11, _46, d_lsm.7_11) to 1 when _46 == 1. This happens by removing the conditional and applying a | 1 = 1. Normally we re-introduce the conditional and its else value if needed but that does not happen here as we're not dealing with a vector type. For correctness's sake, we must not remove the conditional even for non-vector types. This patch re-introduces a COND_EXPR in such cases. For PR118140 this result in a non-vectorized loop. PR middle-end/118140 gcc/ChangeLog: * gimple-match-exports.cc (maybe_resimplify_conditional_op): Add COND_EXPR when we simplified to a scalar gimple value but still have an else value. gcc/testsuite/ChangeLog: * gcc.dg/vect/pr118140.c: New test. * gcc.target/riscv/rvv/autovec/pr118140.c: New test. (cherry picked from commit 14cb0610559fa33f211e1546260458496fdc5e71) Diff: --- gcc/gimple-match-exports.cc| 26 +++ gcc/testsuite/gcc.dg/vect/pr118140.c | 27 .../gcc.target/riscv/rvv/autovec/pr118140.c| 29 ++ 3 files changed, 72 insertions(+), 10 deletions(-) diff --git a/gcc/gimple-match-exports.cc b/gcc/gimple-match-exports.cc index aacf3ff04145..1fe6c0e38833 100644 --- a/gcc/gimple-match-exports.cc +++ b/gcc/gimple-match-exports.cc @@ -323,23 +323,29 @@ maybe_resimplify_conditional_op (gimple_seq *seq, gimple_match_op *res_op, } /* If the "then" value is a gimple value and the "else" value matters, - create a VEC_COND_EXPR between them, then see if it can be further + create a (VEC_)COND_EXPR between them, then see if it can be further simplified. */ gimple_match_op new_op; if (res_op->cond.else_value - && VECTOR_TYPE_P (res_op->type) && gimple_simplified_result_is_gimple_val (res_op)) { - tree len = res_op->cond.len; - if (!len) - new_op.set_op (VEC_COND_EXPR, res_op->type, - res_op->cond.cond, res_op->ops[0], - res_op->cond.else_value); + if (VECTOR_TYPE_P (res_op->type)) + { + tree len = res_op->cond.len; + if (!len) + new_op.set_op (VEC_COND_EXPR, res_op->type, + res_op->cond.cond, res_op->ops[0], + res_op->cond.else_value); + else + new_op.set_op (IFN_VCOND_MASK_LEN, res_op->type, + res_op->cond.cond, res_op->ops[0], + res_op->cond.else_value, + res_op->cond.len, res_op->cond.bias); + } else - new_op.set_op (IFN_VCOND_MASK_LEN, res_op->type, + new_op.set_op (COND_EXPR, res_op->type, res_op->cond.cond, res_op->ops[0], - res_op->cond.else_value, - res_op->cond.len, res_op->cond.bias); + res_op->cond.else_value); *res_op = new_op; return gimple_resimplify3 (seq, res_op, valueize); } diff --git a/gcc/testsuite/gcc.dg/vect/pr118140.c b/gcc/testsuite/gcc.dg/vect/pr118140.c new file mode 100644 index ..2dab98bfc913 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr118140.c @@ -0,0 +1,27 @@ +/* { dg-do run { target { aarch64*-*-* || riscv*-*-* } } } */ +/* { dg-additional-options "-std=gnu99" } */ + +long long a; +_Bool d; +char e; +_Bool f[17]; +_Bool f_3; + +int main() { + for (char g = 3; g < 16; g++) { + d |= ({ +int h = f[g - 1] ? 2 : 0; +_Bool t; +if (f[g - 1]) + t = f_3; +else + t = 0; +int i = t; +h > i; + }); +e += f[g + 1]; + } + + if (d != 0) +__builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118140.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118140.c new file mode 100644 index ..31134de7b3a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118140.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-add-options riscv_v } */ +/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */ + +long long a; +_Bool d; +char e; +_Bool f[17]; +_Bool f_3; + +int main() { + for (char g = 3; g < 16; g++) { + d |= ({ +int h = f[g - 1] ? 2 : 0; +_Bool t; +if (f[g - 1]) + t = f_3; +else + t = 0; +int i = t; +h > i; + }); +e +
[gcc r15-8084] RISC-V: Mask values before initializing bitmask vector [PR119114].
https://gcc.gnu.org/g:b58a0af4e2e28b395a3cb4b7283f16f05c0cf3c8 commit r15-8084-gb58a0af4e2e28b395a3cb4b7283f16f05c0cf3c8 Author: Robin Dapp Date: Tue Mar 11 14:30:48 2025 +0100 RISC-V: Mask values before initializing bitmask vector [PR119114]. In the somewhat convoluted vector code of PR119114 we extract a mask value from a vector mask. After some middle-end simplifications we end up with a value of -2. Its lowest bit is correctly unset representing "false". When initializing a bitmak vector from values we compare the full value/register against zero instead of just the last bit. This causes erroneous mask values. This patch masks the values by & 0x1 before comparing against 0. PR target/119114 gcc/ChangeLog: * config/riscv/autovec.md: Apply & 0x1 mask when initializing bitmask vector. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr119114.c: New test. Diff: --- gcc/config/riscv/autovec.md| 14 +++- .../gcc.target/riscv/rvv/autovec/pr119114.c| 37 ++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 92e6942b5233..c7f12f9e36f5 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -405,16 +405,28 @@ ;; Provide a vec_init for mask registers by initializing ;; a QImode vector and comparing it against 0. +;; As we need to ignore all but the lowest bit apply an AND mask +;; before doing the comparison. (define_expand "vec_initqi" [(match_operand:VB 0 "register_operand") (match_operand 1 "")] "TARGET_VECTOR" { +/* Expand into a QImode vector. */ machine_mode qimode = riscv_vector::get_vector_mode (QImode, GET_MODE_NUNITS (mode)).require (); rtx tmp = gen_reg_rtx (qimode); riscv_vector::expand_vec_init (tmp, operands[1]); -riscv_vector::expand_vec_cmp (operands[0], NE, tmp, CONST0_RTX (qimode)); + +/* & 0x1. */ +insn_code icode = code_for_pred (AND, qimode); +rtx tmp2 = gen_reg_rtx (qimode); +rtx ones = gen_const_vec_duplicate (qimode, GEN_INT (1)); +rtx ops[] = {tmp2, tmp, ones}; +riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops); + +/* Compare against zero. */ +riscv_vector::expand_vec_cmp (operands[0], NE, tmp2, CONST0_RTX (qimode)); DONE; } ) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr119114.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr119114.c new file mode 100644 index ..01025d62f1fd --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr119114.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-require-effective-target rv64 } */ +/* { dg-add-options riscv_v } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -std=gnu99 -fwhole-program -mrvv-vector-bits=zvl" } */ + +_Bool a; +short b[18]; +long long al; +_Bool e; +char f = 010; +short t[18]; +unsigned short w[8][18][18][18]; + +void +c (_Bool e, char f, short t[], unsigned short w[][18][18][18]) +{ + for (int ae = 1; ae < f + 5; ae += 2) +{ + a -= (_Bool) (t[ae - 1] & t[ae + 3]); + for (short af = 0; af < 18; af += 2) + for (_Bool ah = 0; ah < (w[e][1][af][0] > 0); ah = 5) + b[af] |= 9; +} +} + +int +main () +{ + for (int ad = 0; ad < 18; ad++) +t[ad] = 3; + + c (e, f, t, w); + al = a; + if (al != 0) +__builtin_abort (); +}
[gcc r15-9329] expr: Use constant_lower_bound classifying constructor els [PR116595].
https://gcc.gnu.org/g:f183ae0ae891a471764876eb1e69239904598bb4 commit r15-9329-gf183ae0ae891a471764876eb1e69239904598bb4 Author: Robin Dapp Date: Thu Apr 3 16:46:05 2025 +0200 expr: Use constant_lower_bound classifying constructor els [PR116595]. In categorize_ctor_elements_1 we do VECTOR_CST_NELTS (value).to_constant () but VALUE's type can be a VLA vector (since r15-5780-g17b520a10cdaab). This patch uses constant_lower_bound instead. PR middle-end/116595 gcc/ChangeLog: * expr.cc (categorize_ctor_elements_1): Use constant_lower_bound. gcc/testsuite/ChangeLog: * g++.target/riscv/rvv/autovec/pr116595.C: New test. Diff: --- gcc/expr.cc | 6 +++--- gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C | 10 ++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/gcc/expr.cc b/gcc/expr.cc index 2147eedad7be..3815c565e2d8 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -7193,9 +7193,9 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, case VECTOR_CST: { - /* We can only construct constant-length vectors using - CONSTRUCTOR. */ - unsigned int nunits = VECTOR_CST_NELTS (value).to_constant (); + unsigned int nunits + = constant_lower_bound + (TYPE_VECTOR_SUBPARTS (TREE_TYPE (value))); for (unsigned int i = 0; i < nunits; ++i) { tree v = VECTOR_CST_ELT (value, i); diff --git a/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C new file mode 100644 index ..6d509d2cf74e --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv" } */ + +#include + +void +vsseg (float *a, vfloat32mf2_t b, vfloat32mf2_t c, unsigned long vl) +{ + vfloat32mf2x2_t foo = vfloat32mf2x2_t (); +}
[gcc r15-9330] testsuite: Add -mabi to pr116595.C
https://gcc.gnu.org/g:ac1044da4b3db6cba7aa5d9faa1f0622b10ff823 commit r15-9330-gac1044da4b3db6cba7aa5d9faa1f0622b10ff823 Author: Robin Dapp Date: Wed Apr 9 12:11:52 2025 +0200 testsuite: Add -mabi to pr116595.C As usual, I forgot to add -mabi=lp64d to the test case. This patch adds it. Going to push as obvious. gcc/testsuite/ChangeLog: * g++.target/riscv/rvv/autovec/pr116595.C: Add -mabi. Diff: --- gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C index 6d509d2cf74e..37475493a214 100644 --- a/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C +++ b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr116595.C @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d" } */ #include
[gcc r15-9491] RISC-V: Do not lift up vsetvl into non-transparent blocks [PR119547].
https://gcc.gnu.org/g:517f7e3f02b4c945d2b4bdabb490961cf986391e commit r15-9491-g517f7e3f02b4c945d2b4bdabb490961cf986391e Author: Robin Dapp Date: Fri Apr 4 17:06:44 2025 +0200 RISC-V: Do not lift up vsetvl into non-transparent blocks [PR119547]. When lifting up a vsetvl into a block we currently don't consider the block's transparency with respect to the vsetvl as in other parts of the pass. This patch does not perform the lift when transparency is not guaranteed. This condition is more restrictive than necessary as we can still perform a vsetvl lift if the conflicting register is only every used in vsetvls and no regular insns but given how late we are in the GCC 15 cycle it seems better to defer this. Therefore gcc.target/riscv/rvv/vsetvl/avl_single-68.c is XFAILed for now. This issue was found in OpenCV where it manifests as a runtime error. Zhijin Zeng debugged PR119547 and provided an initial patch. Reported-By: 曾治金 PR target/119547 gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (pre_vsetvl::earliest_fuse_vsetvl_info): Do not perform lift if block is not transparent. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/avl_single-68.c: xfail. * g++.target/riscv/rvv/autovec/pr119547.C: New test. * g++.target/riscv/rvv/autovec/pr119547-2.C: New test. * gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c: Adjust. Diff: --- gcc/config/riscv/riscv-vsetvl.cc | 12 ++ .../g++.target/riscv/rvv/autovec/pr119547-2.C | 212 + .../g++.target/riscv/rvv/autovec/pr119547.C| 82 .../gcc.target/riscv/rvv/vsetvl/avl_single-68.c| 8 +- .../riscv/rvv/vsetvl/vlmax_switch_vtype-10.c | 4 +- 5 files changed, 315 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 0ac2538f596f..c4046bcc3455 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -3022,6 +3022,18 @@ pre_vsetvl::earliest_fuse_vsetvl_info (int iter) continue; } + /* We cannot lift a vsetvl into the source block if the block is +not transparent WRT to it. +This is too restrictive for blocks where a register's use only +feeds into vsetvls and no regular insns. One example is the +test rvv/vsetvl/avl_single-68.c which is currently XFAILed for +that reason. +In order to support this case we'd need to check the vsetvl's +AVL operand's uses in the source block and make sure they are +only used in other vsetvls. */ + if (!bitmap_bit_p (m_transp[eg->src->index], expr_index)) + continue; + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, diff --git a/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547-2.C b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547-2.C new file mode 100644 index ..1b98d3d0c32b --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547-2.C @@ -0,0 +1,212 @@ +/* { dg-do run { target rv64 } } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param=logical-op-non-short-circuit=0" } */ + +#include + +using v_uint8 = vuint8m2_t; +using v_int8 = vint8m2_t; +using v_uint16 = vuint16m2_t; +using v_int16 = vint16m2_t; +using v_uint32 = vuint32m2_t; +using v_int32 = vint32m2_t; +using v_uint64 = vuint64m2_t; +using v_int64 = vint64m2_t; +using v_float32 = vfloat32m2_t; +using v_float64 = vfloat64m2_t; + +using uchar = unsigned char; +using schar = signed char; +using ushort = unsigned short; +using uint = unsigned int; +using uint64 = unsigned long int; +using int64 = long int; + +struct Size +{ + int width; + int height; +}; + +template struct VTraits; + +template <> struct VTraits +{ + static inline int vlanes () { return __riscv_vsetvlmax_e32m1 (); } + using lane_type = int32_t; + static const int max_nlanes = 1024 / 32 * 2; +}; +template <> struct VTraits +{ + static inline int vlanes () { return __riscv_vsetvlmax_e32m2 (); } + using lane_type = int32_t; + static const int max_nlanes = 1024 / 32 * 2; +}; +template <> struct VTraits +{ + static inline int vlanes () { return __riscv_vsetvlmax_e32m4 (); } + using lane_type = int32_t; + static const int max_nlanes = 1024 / 32 * 2; +}; +template <> struct VTraits +{ + static inline int vlanes () { return __riscv_vsetvlmax_e32m8 (); } + using lane_type = int32_t; + static const int max_nlanes = 1024 / 32 * 2; +}; + +template <> struct VTraits +{ + static inline int vlanes () { return __riscv_vsetvlmax_e64m1 (); } + using lane_type = double; + static const int max_nlanes = 102
[gcc r14-11747] vect: Use original LHS type for gather pattern [PR118950].
https://gcc.gnu.org/g:bed0053a28e238cb9c079a3ec89341541390f933 commit r14-11747-gbed0053a28e238cb9c079a3ec89341541390f933 Author: Robin Dapp Date: Fri Feb 21 07:19:40 2025 +0100 vect: Use original LHS type for gather pattern [PR118950]. In PR118950 we do not zero masked elements in a gather load. While recognizing a gather/scatter pattern we do not use the original type of the LHS. This matters because the type can differ with bool patterns (e.g. _Bool vs unsigned char) and we don't notice the need for zeroing out the padding bytes. This patch just uses the original LHS's type. PR middle-end/118950 gcc/ChangeLog: * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Use original LHS's type. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr118950.c: New test. (cherry picked from commit f3d4208e798afafcba5246334004e9646e390681) Diff: --- .../gcc.target/riscv/rvv/autovec/pr118950.c| 29 ++ gcc/tree-vect-patterns.cc | 3 ++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c new file mode 100644 index ..604d4264eac1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-add-options riscv_v } */ +/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */ + +unsigned char a; +long long r; +_Bool h = 1; +short j[23]; +_Bool k[3][23]; + +void b(_Bool h, short j[], _Bool k[][23]) { + for (int m = 0; m < 23; m += 3) +for (short n = 0; n < 22; n += 4) + a = ({ +unsigned char o = a; +unsigned char p = j[n] ? h : k[m][n]; +o > p ? o : p; + }); +} + +int main() { + for (int m = 0; m < 23; ++m) +j[m] = 10; + b(h, j, k); + r = a; + if (r != 1) +__builtin_abort (); +} diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 87c2acff386d..ed6cac77d2a1 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -6264,7 +6264,8 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo, else pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base, offset, scale, zero); - tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL); + tree lhs = gimple_get_lhs (stmt_info->stmt); + tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); gimple_call_set_lhs (pattern_stmt, load_lhs); } else
[gcc r15-7608] RISC-V: Fix ratio in vsetvl fuse rule [PR115703].
https://gcc.gnu.org/g:44d4a1086d965fb5280daf65c7c4a253ad6cc8a1 commit r15-7608-g44d4a1086d965fb5280daf65c7c4a253ad6cc8a1 Author: Robin Dapp Date: Thu Feb 6 14:43:17 2025 +0100 RISC-V: Fix ratio in vsetvl fuse rule [PR115703]. In PR115703 we fuse two vsetvls: Fuse curr info since prev info compatible with it: prev_info: VALID (insn 438, bb 2) Demand fields: demand_ge_sew demand_non_zero_avl SEW=32, VLMUL=m1, RATIO=32, MAX_SEW=64 TAIL_POLICY=agnostic, MASK_POLICY=agnostic AVL=(reg:DI 0 zero) VL=(reg:DI 9 s1 [312]) curr_info: VALID (insn 92, bb 20) Demand fields: demand_ratio_and_ge_sew demand_avl SEW=64, VLMUL=m1, RATIO=64, MAX_SEW=64 TAIL_POLICY=agnostic, MASK_POLICY=agnostic AVL=(const_int 4 [0x4]) VL=(nil) prev_info after fused: VALID (insn 438, bb 2) Demand fields: demand_ratio_and_ge_sew demand_avl SEW=64, VLMUL=mf2, RATIO=64, MAX_SEW=64 TAIL_POLICY=agnostic, MASK_POLICY=agnostic AVL=(const_int 4 [0x4]) VL=(nil). The result is vsetvl zero, zero, e64, mf2, ta, ma. The previous vsetvl set vl = 4 but here we wrongly set it to vl = 2. As all the following vsetvls only ever change the ratio we never recover. The issue is quite difficult to trigger because we can often deduce the value of d at runtime. Then very check for the value of d will be optimized away. The last known bad commit is r15-3458-g5326306e7d9d36. With that commit the output is wrong but -fno-schedule-insns makes it correct. From the next commit on the issue is latent. I still added the PR's test as scan and run check even if they don't trigger right now. Not sure if the run test will ever fail but well. I verified that the patch fixes the issue when applied on top of r15-3458-g5326306e7d9d36. PR target/115703 gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc: Use max_sew for calculating the new LMUL. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr115703-run.c: New test. * gcc.target/riscv/rvv/autovec/pr115703.c: New test. Diff: --- gcc/config/riscv/riscv-vsetvl.cc | 3 +- .../gcc.target/riscv/rvv/autovec/pr115703-run.c| 44 ++ .../gcc.target/riscv/rvv/autovec/pr115703.c| 38 +++ 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 72c4c59514e5..82284624a242 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -1756,7 +1756,8 @@ private: inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev, const vsetvl_info &next) { -prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ())); +int max_sew = MAX (prev.get_sew (), next.get_sew ()); +prev.set_vlmul (calculate_vlmul (max_sew, next.get_ratio ())); use_max_sew (prev, next); prev.set_ratio (next.get_ratio ()); } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c new file mode 100644 index ..0c2c3d7d4fcc --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703-run.c @@ -0,0 +1,44 @@ +/* { dg-do run } */ +/* { dg-require-effective-target rvv_zvl256b_ok } */ +/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -fwhole-program -fwrapv" } */ + +int a, i; +unsigned long b; +unsigned c, f; +long long d = 1; +short e, m; +long g, h; + +__attribute__ ((noipa)) +void check (unsigned long long x) +{ + if (x != 13667643351234938049ull) +__builtin_abort (); +} + +int main() { + for (int q = 0; q < 2; q += 1) { +for (short r = 0; r < 2; r += 1) + for (char s = 0; s < 6; s++) +for (short t = 0; t < 011; t += 12081 - 12080) + for (short u = 0; u < 11; u++) { +a = ({ a > 1 ? a : 1; }); +b = ({ b > 5 ? b : 5; }); +for (short j = 0; j < 2; j = 2080) + c = ({ c > 030 ? c : 030; }); +for (short k = 0; k < 2; k += 2080) + d *= 7; +e *= 10807; +f = ({ f > 3 ? f : 3; }); + } +for (int l = 0; l < 21; l += 1) + for (int n = 0; n < 16; n++) { +g = ({ m ? g : m; }); +for (char o = 0; o < 7; o += 1) + h *= 3; +i = ({ i < 0 ? i : 0; }); + } + } + + check (d); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c new file mode 100644 index ..207ff3c86ec3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr115703.c @@ -0,0 +1,38 @@ +/* { dg-do
[gcc r15-8021] RISC-V: Adjust LMUL when using maximum SEW [PR117955].
https://gcc.gnu.org/g:f043ef2b6a59088b16a269b55f09023f76c92e32 commit r15-8021-gf043ef2b6a59088b16a269b55f09023f76c92e32 Author: Robin Dapp Date: Tue Feb 25 12:55:08 2025 +0100 RISC-V: Adjust LMUL when using maximum SEW [PR117955]. When merging two vsetvls that both only demand "SEW >= ..." we use their maximum SEW and keep the LMUL. That may lead to invalid vector configurations like e64, mf4. As we make sure that the SEW requirements overlap we can use the SEW and LMUL of the configuration with the larger SEW. Ma Jin already touched this merge rule some weeks ago and fixed the ratio calculation (r15-6873). Calculating the ratio from an invalid SEW/LMUL combination lead to an overflow in the ratio variable, though. I'd argue the proper fix is to update SEW and LMUL, keeping the ratio as before. This "breaks" bug-10.c but its check only checked for a workaround anyway so I turned it into a run test. Ma Jin helped minify the PR's test and provided a larger test case for bug-10. PR target/117955 gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc: Use LMUL/ratio from vsetvl with larger SEW. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/bug-10.c: Convert to run test. * gcc.target/riscv/rvv/base/bug-10-2.c: New test. * gcc.target/riscv/rvv/base/pr117955.c: New test. Diff: --- gcc/config/riscv/riscv-vsetvl.cc | 8 +- gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c | 93 ++ gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c | 33 +++- gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c | 26 ++ 4 files changed, 154 insertions(+), 6 deletions(-) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 82284624a242..f0165f7b8c8c 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -1729,9 +1729,11 @@ private: } inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next) { -int max_sew = MAX (prev.get_sew (), next.get_sew ()); -prev.set_sew (max_sew); -prev.set_ratio (calculate_ratio (prev.get_sew (), prev.get_vlmul ())); +bool prev_sew_larger = prev.get_sew () >= next.get_sew (); +const vsetvl_info from = prev_sew_larger ? prev : next; +prev.set_sew (from.get_sew ()); +prev.set_vlmul (from.get_vlmul ()); +prev.set_ratio (from.get_ratio ()); use_min_of_max_sew (prev, next); } inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c new file mode 100644 index ..fe3a1efb8d86 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10-2.c @@ -0,0 +1,93 @@ +/* { dg-do run { target { rv64 } } } */ +/* { dg-require-effective-target rv64 } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-require-effective-target riscv_zvfh_ok } */ +/* { dg-options " -march=rv64gcv_zvfh -mabi=lp64d -O2" } */ + +#include + +int8_t a[1]; +uint16_t b[1]; +float c[1], n[1]; +uint16_t d[1]; +uint8_t e[1]; +uint16_t f[1]; +_Float16 g[1], k[1], m[1], p[1]; +uint16_t i[1]; +int8_t j[1]; +uint8_t o[1]; +uint32_t l[1]; +uint16_t q[1]; +uint32_t r[1]; +uint32_t s[1]; +int16_t t[1]; +int main() +{ + int u = 25; + int8_t *v = a; + uint32_t *w; + uint16_t *aa = b; + float *ab = c, *as = n; + uint32_t *ad; + uint16_t *ah = f; + _Float16 *ai = g, *aj = k, *an = m, *au = p; + int32_t *ak; + int16_t *al; + uint16_t *am = i; + int8_t *ao = j; + uint8_t *ap = o; + uint32_t *aq = l; + uint16_t *ar = q; + uint32_t *at = r; + uint32_t *av = s; + int32_t *ax; + int16_t *ay = t; + for (size_t az; u; u -= az) + { +az = __riscv_vsetvl_e32m8(u); +vint8m2_t ba = __riscv_vle8_v_i8m2(v, az); +vbool4_t bb = __riscv_vmseq_vx_i8m2_b4(ba, 1, az); +vuint16m4_t bc = __riscv_vsll_vx_u16m4(__riscv_vid_v_u16m4(az), 2, az); +vuint32m8_t bd = __riscv_vsll_vx_u32m8(__riscv_vid_v_u32m8(az), 1, az); +vuint32m8_t be = __riscv_vluxei16_v_u32m8_m(bb, w, bc, az); +vuint16m4_t bf; +__riscv_vsuxei16_v_u32m8_m(bb, aq, bf, be, az); +vuint8m2_t bg = __riscv_vsll_vx_u8m2(__riscv_vid_v_u8m2(az), 1, az); +vuint16m4_t bh = __riscv_vloxei8_v_u16m4(aa, bg, az); +vfloat16m4_t bi; +vuint16m4_t bj = __riscv_vsll_vx_u16m4(__riscv_vid_v_u16m4(az), 1, az); +vint16m4_t bk = __riscv_vloxei32_v_i16m4_m(bb, al, bd, az); +__riscv_vsse16_v_u16m4(ar, 2, bh, az); +vuint16m4_t bl = __riscv_vloxei16_v_u16m4(d, bj, az); +vfloat16m4_t bm = __riscv_vle16_v_f16m4(ai, az); +vuint16m4_t bn = __riscv_vlse16_v_u16m4(ah, 2, az); +vint32m8_t bo = __riscv_vle32_v_i32m8_m(bb, ak, az); +vfloat16m1_t bp = __riscv_vle16_v_f16m1(aj, az); +vuint16m4_t bq = __riscv_vrgatherei16_vv_u16m4(bl, bn, az); +
[gcc r15-8022] RISC-V: Do not delete fused vsetvl if it has uses [PR119115].
https://gcc.gnu.org/g:77ef91d7159613c0cfc2920ddd5a32952c61ff5b commit r15-8022-g77ef91d7159613c0cfc2920ddd5a32952c61ff5b Author: Robin Dapp Date: Wed Mar 5 18:16:57 2025 +0100 RISC-V: Do not delete fused vsetvl if it has uses [PR119115]. In PR119115 we end up with an orphaned vsetvli zero,t1,e16,m1,ta,ma. t1 originally came from another vsetvl that was fused from vsetvli a4,a3,e8,mf2,ta,ma vsetvli t1,a3,e8,mf2,ta,ma (1) to vsetvli zero,a3,e16,m1,ta,ma. This patch checks if t1, the VL operand of (1), has AVL uses and does not delete the vsetvl if so. While doing so, it also wraps the search for VL uses into two new functions reg_used and reg_single_use_in_avl. PR target/119115 gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (reg_used): New function. (reg_single_use_in_avl): Ditto. (pre_vsetvl::fuse_local_vsetvl_info): Use reg_single_use_in_avl when checking if vsetvl can be deleted. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr119115.c: New test. Diff: --- gcc/config/riscv/riscv-vsetvl.cc | 95 -- gcc/testsuite/gcc.target/riscv/rvv/base/pr119115.c | 59 ++ 2 files changed, 131 insertions(+), 23 deletions(-) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index f0165f7b8c8c..0ac2538f596f 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -780,6 +780,36 @@ enum class avl_demand_type : unsigned ignore_avl = demand_flags::DEMAND_EMPTY_P, }; +/* Go through all uses of INSN looking for a single use of register REG. + Return true if we find +- Uses in a non-RVV insn +- More than one use in an RVV insn +- A single use in the VL operand of an RVV insn + and false otherwise. + A single use in the AVL operand does not count as use as we take care of + those separately in the pass. */ + +static bool +reg_used (insn_info *insn, rtx reg) +{ + unsigned int regno = REGNO (reg); + const hash_set vl_uses = get_all_real_uses (insn, regno); + for (use_info *use : vl_uses) +{ + gcc_assert (use->insn ()->is_real ()); + rtx_insn *rinsn = use->insn ()->rtl (); + if (!has_vl_op (rinsn) + || count_regno_occurrences (rinsn, regno) != 1) + return true; + + rtx avl = ::get_avl (rinsn); + if (!avl || !REG_P (avl) || regno != REGNO (avl)) + return true; +} + return false; +} + + class vsetvl_info { private: @@ -1142,27 +1172,7 @@ public: /* Determine if dest operand(vl) has been used by non-RVV instructions. */ if (dest_vl) - { - const hash_set vl_uses - = get_all_real_uses (get_insn (), REGNO (dest_vl)); - for (use_info *use : vl_uses) - { - gcc_assert (use->insn ()->is_real ()); - rtx_insn *rinsn = use->insn ()->rtl (); - if (!has_vl_op (rinsn) - || count_regno_occurrences (rinsn, REGNO (dest_vl)) != 1) - { - m_vl_used_by_non_rvv_insn = true; - break; - } - rtx avl = ::get_avl (rinsn); - if (!avl || !REG_P (avl) || REGNO (dest_vl) != REGNO (avl)) - { - m_vl_used_by_non_rvv_insn = true; - break; - } - } - } + m_vl_used_by_non_rvv_insn = reg_used (get_insn (), dest_vl); /* Collect the read vl insn for the fault-only-first rvv loads. */ if (fault_first_load_p (insn->rtl ())) @@ -1369,6 +1379,35 @@ public: void set_empty_info () { global_info.set_empty (); } }; +/* Same as REG_USED () but looks for a single use in an RVV insn's AVL + operand. */ +static bool +reg_single_use_in_avl (insn_info *insn, rtx reg) +{ + if (!reg) +return false; + unsigned int regno = REGNO (reg); + const hash_set vl_uses = get_all_real_uses (insn, regno); + for (use_info *use : vl_uses) +{ + gcc_assert (use->insn ()->is_real ()); + rtx_insn *rinsn = use->insn ()->rtl (); + if (!has_vl_op (rinsn) + || count_regno_occurrences (rinsn, regno) != 1) + return false; + + vsetvl_info info = vsetvl_info (use->insn ()); + + if (!info.has_nonvlmax_reg_avl ()) + return false; + + rtx avl = info.get_avl (); + if (avl && REG_P (avl) && regno == REGNO (avl)) + return true; +} + return false; +} + /* Demand system is the RVV-based VSETVL info analysis tools wrapper. It defines compatible rules for SEW/LMUL, POLICY and AVL. Also, it provides 3 interfaces available_p, compatible_p and @@ -2797,8 +2836,18 @@ pre_vsetvl::fuse_local_vsetvl_info () 64 into 32. */ prev_info.set_max_sew ( MIN (prev_info.get_max_sew (), curr_info.get_max_sew ())); - if (!curr_info.vl_use
[gcc r15-7687] vect: Use original LHS type for gather pattern [PR118950].
https://gcc.gnu.org/g:f3d4208e798afafcba5246334004e9646e390681 commit r15-7687-gf3d4208e798afafcba5246334004e9646e390681 Author: Robin Dapp Date: Fri Feb 21 07:19:40 2025 +0100 vect: Use original LHS type for gather pattern [PR118950]. In PR118950 we do not zero masked elements in a gather load. While recognizing a gather/scatter pattern we do not use the original type of the LHS. This matters because the type can differ with bool patterns (e.g. _Bool vs unsigned char) and we don't notice the need for zeroing out the padding bytes. This patch just uses the original LHS's type. PR middle-end/118950 gcc/ChangeLog: * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Use original LHS's type. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr118950.c: New test. Diff: --- .../gcc.target/riscv/rvv/autovec/pr118950.c| 29 ++ gcc/tree-vect-patterns.cc | 3 ++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c new file mode 100644 index ..604d4264eac1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118950.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-add-options riscv_v } */ +/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */ + +unsigned char a; +long long r; +_Bool h = 1; +short j[23]; +_Bool k[3][23]; + +void b(_Bool h, short j[], _Bool k[][23]) { + for (int m = 0; m < 23; m += 3) +for (short n = 0; n < 22; n += 4) + a = ({ +unsigned char o = a; +unsigned char p = j[n] ? h : k[m][n]; +o > p ? o : p; + }); +} + +int main() { + for (int m = 0; m < 23; ++m) +j[m] = 10; + b(h, j, k); + r = a; + if (r != 1) +__builtin_abort (); +} diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 6fc97d1b6ef9..4f0a7ea162b9 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -6022,7 +6022,8 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo, else pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base, offset, scale, zero); - tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL); + tree lhs = gimple_get_lhs (stmt_info->stmt); + tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); gimple_call_set_lhs (pattern_stmt, load_lhs); } else
[gcc r15-7688] RISC-V: Include pattern stmts for dynamic LMUL computation [PR114516].
https://gcc.gnu.org/g:6be1b9e94d9a2ead15e3625e833f1e34503ab803 commit r15-7688-g6be1b9e94d9a2ead15e3625e833f1e34503ab803 Author: Robin Dapp Date: Fri Feb 21 17:08:16 2025 +0100 RISC-V: Include pattern stmts for dynamic LMUL computation [PR114516]. When scanning for program points, i.e. vector statements, we're missing pattern statements. In PR114516 this becomes obvious as we choose LMUL=8 assuming there are only three statements but the divmod pattern adds another three. Those push us beyond four registers so we need to switch to LMUL=4. This patch adds pattern statements to the program points which helps calculate a better register pressure estimate. PR target/114516 gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (compute_estimated_lmul): Add pattern statements to program points. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/pr114516.c: New test. Diff: --- gcc/config/riscv/riscv-vector-costs.cc | 29 ++ .../gcc.dg/vect/costmodel/riscv/rvv/pr114516.c | 29 ++ 2 files changed, 58 insertions(+) diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index d4571b65e193..167375ca7516 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -217,6 +217,35 @@ compute_local_program_points ( "program point %d: %G", info.point, gsi_stmt (si)); } + + /* If the statement is part of a pattern, also add the other +pattern statements. */ + gimple_seq pattern_def_seq; + if (STMT_VINFO_IN_PATTERN_P (stmt_info) + && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))) + { + gimple_stmt_iterator si2; + + for (si2 = gsi_start (pattern_def_seq); + !gsi_end_p (si2); + gsi_next (&si2)) + { + stmt_vec_info pattern_def_stmt_info + = vinfo->lookup_stmt (gsi_stmt (si2)); + if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info) + || STMT_VINFO_LIVE_P (pattern_def_stmt_info)) + { + stmt_point info = {point, gsi_stmt (si2), + pattern_def_stmt_info}; + program_points.safe_push (info); + point++; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, +"program point %d: %G", +info.point, gsi_stmt (si2)); + } + } + } } program_points_per_bb.put (bb, program_points); } diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114516.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114516.c new file mode 100644 index ..55d036c3ad7b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114516.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zba_zbb -mabi=lp64d -mrvv-max-lmul=dynamic -O3 -fdump-tree-vect-details" } */ + +typedef float real_t; +__attribute__((aligned(64))) real_t a[32000]; +real_t s315() +{ +for (int i = 0; i < 32000; i++) +a[i] = (i * 7) % 32000; +real_t x, chksum; +int index; +for (int nl = 0; nl < 256; nl++) { +x = a[0]; +index = 0; +for (int i = 0; i < 32000; ++i) { +if (a[i] > x) { +x = a[i]; +index = i; +} +} +chksum = x + (real_t) index; +} +return index + x + 1; +} + +/* { dg-final { scan-assembler {e32,m4} } } */ +/* { dg-final { scan-assembler-not {e32,m8} } } */ +/* { dg-final { scan-assembler-not {csrr} } } */ +/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it has unexpected spills" 1 "vect" } } */
[gcc r15-9161] RISC-V: Fix vec_duplicate[bimode] expander [PR119572].
https://gcc.gnu.org/g:716d39f0a248c1003033e6a312c736180790ef70 commit r15-9161-g716d39f0a248c1003033e6a312c736180790ef70 Author: Robin Dapp Date: Tue Apr 1 21:17:54 2025 +0200 RISC-V: Fix vec_duplicate[bimode] expander [PR119572]. Since r15-9062-g70391e3958db79 we perform vector bitmask initialization via the vec_duplicate expander directly. This triggered a latent bug in ours where we missed to mask out the single bit which resulted in an execution FAIL of pr119114.c The attached patch adds the 1-masking of the broadcast operand. PR target/119572 gcc/ChangeLog: * config/riscv/autovec.md: Mask broadcast value. Diff: --- gcc/config/riscv/autovec.md | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index f53ed3a5e3fd..9e51e3ce6a30 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -330,7 +330,15 @@ { poly_int64 nunits = GET_MODE_NUNITS (mode); machine_mode mode = riscv_vector::get_vector_mode (QImode, nunits).require (); -rtx dup = expand_vector_broadcast (mode, operands[1]); + +/* The 1-bit mask is in a QImode register, make sure we only use the last + bit. See also PR119114 and the respective vec_init expander. */ +rtx tmp = gen_reg_rtx (Xmode); +emit_insn + (gen_rtx_SET (tmp, gen_rtx_AND (Xmode, gen_lowpart (Xmode, operands[1]), + CONST1_RTX (Xmode; + +rtx dup = expand_vector_broadcast (mode, gen_lowpart (QImode, tmp)); riscv_vector::expand_vec_cmp (operands[0], NE, dup, CONST0_RTX (mode)); DONE; }
[gcc r16-845] RISC-V: Add autovec mode param.
https://gcc.gnu.org/g:b949d048e914a4cd11a63004a9a2d42e51bc3ac8 commit r16-845-gb949d048e914a4cd11a63004a9a2d42e51bc3ac8 Author: Robin Dapp Date: Wed May 7 21:02:21 2025 +0200 RISC-V: Add autovec mode param. This patch adds a --param=autovec-mode=. When the param is specified we make autovectorize_vector_modes return exactly this mode if it is available. This helps when testing different vectorizer settings. gcc/ChangeLog: * config/riscv/riscv-v.cc (autovectorize_vector_modes): Return user-specified mode if available. * config/riscv/riscv.opt: New param. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/param-autovec-mode.c: New test. Diff: --- gcc/config/riscv/riscv-v.cc| 22 ++ gcc/config/riscv/riscv.opt | 4 .../riscv/rvv/autovec/param-autovec-mode.c | 16 3 files changed, 42 insertions(+) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index e406e7a7f590..be6147b80a2c 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2821,6 +2821,28 @@ autovectorize_vector_modes (vector_modes *modes, bool) i++; size = base_size / (1U << i); } + + /* If the user specified the exact mode to use look if it is available and + remove all other ones before returning. */ + if (riscv_autovec_mode) +{ + auto_vector_modes ms; + ms.safe_splice (*modes); + modes->truncate (0); + + for (machine_mode mode : ms) + { + if (!strcmp (GET_MODE_NAME (mode), riscv_autovec_mode)) + { + modes->safe_push (mode); + return 0; + } + } + + /* Nothing found, fall back to regular handling. */ + modes->safe_splice (ms); +} + /* Enable LOOP_VINFO comparison in COST model. */ return VECT_COMPARE_COSTS; } diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 527e09549a8a..b2b9d3311f4e 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -286,6 +286,10 @@ Max number of bytes to compare as part of inlined strcmp/strncmp routines (defau Target RejectNegative Joined UInteger Var(gpr2vr_cost) Init(GPR2VR_COST_UNPROVIDED) Set the cost value of the rvv instruction when operate from GPR to VR. +-param=riscv-autovec-mode= +Target Undocumented RejectNegative Joined Var(riscv_autovec_mode) Save +Set the only autovec mode to try. + Enum Name(rvv_max_lmul) Type(enum rvv_max_lmul_enum) The RVV possible LMUL (-mrvv-max-lmul=): diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/param-autovec-mode.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/param-autovec-mode.c new file mode 100644 index ..b2ec8f9dc774 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/param-autovec-mode.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param=autovec-mode=V4QI -fdump-tree-vect-details" } */ + +/* By default we will use RVVM1SI mode for vectorization because N is not + known. Check that we use V4QI and create an epilogue when the autovec-mode + param is specified. */ + +void +foo (int *a, int *b, int n) +{ + for (int i = 0; i < n; i++) +a[i] = b[i] + 1; +} + +/* { dg-final { scan-tree-dump "Choosing vector mode V4QI" "vect" } } */ +/* { dg-final { scan-tree-dump "Choosing epilogue vector mode RVVM1SI" "vect" } } */
[gcc r16-846] RISC-V: Support CPUs in -march.
https://gcc.gnu.org/g:4a182418c89666e7594bcb0e5edc5194aa147910 commit r16-846-g4a182418c89666e7594bcb0e5edc5194aa147910 Author: Robin Dapp Date: Thu May 8 09:51:45 2025 +0200 RISC-V: Support CPUs in -march. This patch allows an -march string like -march=sifive-p670 in order override a previous -march in a simple way. Suppose we have a Makefile that specifies -march=rv64gc by default. A user-specified -mcpu=sifive-p670 would be after the -march in the options string and thus only set -mtune=sifive-p670 (as -mcpu does not override a previously specified -march or -mtune). So if we wanted to override we would need to specify the full, lengthy -march=rv64gcv_... string instead of a simple -mcpu=... Therefore this patch always first tries to interpret -march= as CPU string. If it is a supported CPU we use its march properties and let it override previously specified options. Otherwise the behavior is as before. This enables the "last-specified option wins" behavior GCC normally employs. Note that -march does not imply -mtune like on x86 or other targets. So an -march=CPU won't override a previously specified -mtune=other-CPU. gcc/ChangeLog: * common/config/riscv/riscv-common.cc (riscv_subset_list::parse_base_ext): Adjust error message. (riscv_handle_option): Parse as CPU string first. (riscv_expand_arch): Ditto. * doc/invoke.texi: Document. gcc/testsuite/ChangeLog: * gcc.target/riscv/arch-56.c: New test. Diff: --- gcc/common/config/riscv/riscv-common.cc | 19 --- gcc/doc/invoke.texi | 2 +- gcc/testsuite/gcc.target/riscv/arch-56.c | 13 + 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index c843393998cb..a6d8763f032b 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -980,8 +980,9 @@ riscv_subset_list::parse_base_ext (const char *p) } else { - error_at (m_loc, "%<-march=%s%>: ISA string must begin with rv32, rv64 " - "or Profiles", m_arch); + error_at (m_loc, "%<-march=%s%>: ISA string must begin with rv32, rv64," + " a supported RVA profile or refer to a supported CPU", + m_arch); return NULL; } @@ -1708,7 +1709,8 @@ riscv_handle_option (struct gcc_options *opts, switch (decoded->opt_index) { case OPT_march_: - riscv_parse_arch_string (decoded->arg, opts, loc); + if (riscv_find_cpu (decoded->arg) == NULL) + riscv_parse_arch_string (decoded->arg, opts, loc); return true; case OPT_mcpu_: @@ -1725,15 +1727,18 @@ riscv_handle_option (struct gcc_options *opts, /* Expand arch string with implied extensions. */ const char * -riscv_expand_arch (int argc ATTRIBUTE_UNUSED, +riscv_expand_arch (int argc, const char **argv) { gcc_assert (argc == 1); location_t loc = UNKNOWN_LOCATION; - riscv_parse_arch_string (argv[0], NULL, loc); + /* Try to interpret the arch as CPU first. */ + const char *arch_str = riscv_expand_arch_from_cpu (argc, argv); + if (!strlen (arch_str)) +riscv_parse_arch_string (argv[0], NULL, loc); const std::string arch = riscv_arch_str (false); - if (arch.length()) -return xasprintf ("-march=%s", arch.c_str()); + if (arch.length ()) +return xasprintf ("-march=%s", arch.c_str ()); else return ""; } diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 124db1232845..fe47ce564873 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1268,7 +1268,7 @@ See RS/6000 and PowerPC Options. -mfence-tso -mno-fence-tso -mdiv -mno-div -misa-spec=@var{ISA-spec-string} --march=@var{ISA-string|Profiles|Profiles_ISA-string} +-march=@var{ISA-string|Profiles|Profiles_ISA-string|CPU/processor string} -mtune=@var{processor-string} -mpreferred-stack-boundary=@var{num} -msmall-data-limit=@var{N-bytes} diff --git a/gcc/testsuite/gcc.target/riscv/arch-56.c b/gcc/testsuite/gcc.target/riscv/arch-56.c new file mode 100644 index ..e075f9661eef --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-56.c @@ -0,0 +1,13 @@ +/* Check whether the second -march overrides the first. */ +/* { dg-do compile { target rv64 } } */ +/* { dg-options "-O3 -march=rv64gc -march=sifive-p670" } */ + +void +foo (char *a, char *b, int n) +{ + for (int i = 0; i < n; i++) +a[i] = b[i] + 1; +} + +/* { dg-final { scan-assembler "vset" } } */ +/* { dg-final { scan-assembler "zvl128b" } } */
[gcc r16-843] RISC-V: Fix some dynamic LMUL costing.
https://gcc.gnu.org/g:1eb30ea5e36a0cf01e356337f5fabcd3e05fd373 commit r16-843-g1eb30ea5e36a0cf01e356337f5fabcd3e05fd373 Author: Robin Dapp Date: Fri Feb 7 15:42:28 2025 +0100 RISC-V: Fix some dynamic LMUL costing. With all-SLP we annotate statements slightly differently. This patch uses STMT_VINFO_RELEVANT_P in order to walk through potential program points. Also it makes the LMUL estimate always use the same path. This helps fix a number of test cases that regressed since GCC 14. There are still some failing ones but it appears to me that the chosen LMUL is still correct and we just expect different log messages. gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (compute_estimated_lmul): Always use vect_vf_for_cost and TARGET_MIN_VLEN. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c: Adjust expectations. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c: Ditto. Diff: --- gcc/config/riscv/riscv-vector-costs.cc | 25 ++ .../vect/costmodel/riscv/rvv/dynamic-lmul4-3.c | 2 +- .../vect/costmodel/riscv/rvv/dynamic-lmul8-9.c | 1 - 3 files changed, 3 insertions(+), 25 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index c28eecd1110e..a39b611e4cef 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -205,9 +205,7 @@ compute_local_program_points ( if (!is_gimple_assign_or_call (gsi_stmt (si))) continue; stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si)); - enum stmt_vec_info_type type - = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info)); - if (type != undef_vec_info_type) + if (STMT_VINFO_RELEVANT_P (stmt_info)) { stmt_point info = {point, gsi_stmt (si), stmt_info}; program_points.safe_push (info); @@ -626,7 +624,7 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode) int regno_alignment = riscv_get_v_regno_alignment (loop_vinfo->vector_mode); if (riscv_v_ext_vls_mode_p (loop_vinfo->vector_mode)) return regno_alignment; - else if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo), 1U)) + else { int estimated_vf = vect_vf_for_cost (loop_vinfo); int estimated_lmul = estimated_vf * GET_MODE_BITSIZE (mode).to_constant () @@ -636,25 +634,6 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode) else return estimated_lmul; } - else -{ - /* Estimate the VLA SLP LMUL. */ - if (regno_alignment > RVV_M1) - return regno_alignment; - else if (mode != QImode - || LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo).is_constant ()) - { - int ratio; - if (can_div_trunc_p (BYTES_PER_RISCV_VECTOR, - GET_MODE_SIZE (loop_vinfo->vector_mode), &ratio)) - { - if (ratio == 1) - return RVV_M4; - else if (ratio == 2) - return RVV_M2; - } - } -} return 0; } diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c index 85e3021f1c2b..b5a7f180228a 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c @@ -45,4 +45,4 @@ void foo2 (int64_t *__restrict a, /* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 3 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c index 793d16418bf1..56234580fa82 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c @@ -14,6 +14,5 @@ foo (int64_t *__restrict a, int64_t init, int n) /* { dg-final { scan-assembler {e64,m8} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
[gcc r16-844] RISC-V: Default-initialize variable.
https://gcc.gnu.org/g:e6fea0a8be95e2a1cbe733301a205b8c00db9202 commit r16-844-ge6fea0a8be95e2a1cbe733301a205b8c00db9202 Author: Robin Dapp Date: Thu May 8 10:17:26 2025 +0200 RISC-V: Default-initialize variable. This patch initializes saved_vxrm_mode to VXRM_MODE_NONE. This is a warning (but no error) when building the compiler so better fix it. gcc/ChangeLog: * config/riscv/riscv.cc (singleton_vxrm_need): Init saved_vxrm_mode. Diff: --- gcc/config/riscv/riscv.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 358d1ec5d32e..5c0c8beec3bd 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -12321,7 +12321,7 @@ singleton_vxrm_need (void) /* Walk the IL noting if VXRM is needed and if there's more than one mode needed. */ bool found = false; - int saved_vxrm_mode; + int saved_vxrm_mode = VXRM_MODE_NONE; for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) { if (!INSN_P (insn) || DEBUG_INSN_P (insn))
[gcc r16-937] vect: Remove non-SLP paths in strided slp/elementwise.
https://gcc.gnu.org/g:0f73ae763c02ad3b2917c33eecba9b47efdcc73b commit r16-937-g0f73ae763c02ad3b2917c33eecba9b47efdcc73b Author: Robin Dapp Date: Tue May 20 11:23:34 2025 +0200 vect: Remove non-SLP paths in strided slp/elementwise. This patch removes non-SLP paths in the VMAT_STRIDED_SLP/VMAT_ELEMENTWISE part of vectorizable_load. gcc/ChangeLog: * tree-vect-stmts.cc (vectorizable_load): Remove non-SLP paths. Diff: --- gcc/tree-vect-stmts.cc | 49 ++--- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 66958543bf86..3710694ac75d 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -10689,8 +10689,7 @@ vectorizable_load (vec_info *vinfo, first_dr_info = dr_info; } - if (slp && grouped_load - && memory_access_type == VMAT_STRIDED_SLP) + if (grouped_load && memory_access_type == VMAT_STRIDED_SLP) { group_size = DR_GROUP_SIZE (first_stmt_info); ref_type = get_group_alias_ptr_type (first_stmt_info); @@ -10830,22 +10829,20 @@ vectorizable_load (vec_info *vinfo, ltype = build_aligned_type (ltype, align * BITS_PER_UNIT); } - if (slp) + /* For SLP permutation support we need to load the whole group, +not only the number of vector stmts the permutation result +fits in. */ + if (slp_perm) { - /* For SLP permutation support we need to load the whole group, -not only the number of vector stmts the permutation result -fits in. */ - if (slp_perm) - { - /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for -variable VF. */ - unsigned int const_vf = vf.to_constant (); - ncopies = CEIL (group_size * const_vf, const_nunits); - dr_chain.create (ncopies); - } - else - ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for +variable VF. */ + unsigned int const_vf = vf.to_constant (); + ncopies = CEIL (group_size * const_vf, const_nunits); + dr_chain.create (ncopies); } + else + ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + unsigned int group_el = 0; unsigned HOST_WIDE_INT elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); @@ -10883,14 +10880,13 @@ vectorizable_load (vec_info *vinfo, CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_temp); group_el += lnel; - if (! slp - || group_el == group_size) + if (group_el == group_size) { n_groups++; /* When doing SLP make sure to not load elements from the next vector iteration, those will not be accessed so just use the last element again. See PR107451. */ - if (!slp || known_lt (n_groups, vf)) + if (known_lt (n_groups, vf)) { tree newoff = copy_ssa_name (running_off); gimple *incr @@ -10938,19 +10934,10 @@ vectorizable_load (vec_info *vinfo, if (!costing_p) { - if (slp) - { - if (slp_perm) - dr_chain.quick_push (gimple_assign_lhs (new_stmt)); - else - slp_node->push_vec_def (new_stmt); - } + if (slp_perm) + dr_chain.quick_push (gimple_assign_lhs (new_stmt)); else - { - if (j == 0) - *vec_stmt = new_stmt; - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); - } + slp_node->push_vec_def (new_stmt); } } if (slp_perm)
[gcc r16-936] RISC-V: Avoid division by zero in check_builtin_call [PR120436].
https://gcc.gnu.org/g:6ccf845d9fa157e7ebcf2c589a9fc5d8f298961f commit r16-936-g6ccf845d9fa157e7ebcf2c589a9fc5d8f298961f Author: Robin Dapp Date: Mon May 26 16:16:36 2025 +0200 RISC-V: Avoid division by zero in check_builtin_call [PR120436]. In check_builtin_call we eventually perform a division by zero when no vector modes are present. This patch just avoids the division in that case. PR target/120436 gcc/ChangeLog: * config/riscv/riscv-vector-builtins-shapes.cc (struct vset_def): Avoid division by zero. (struct vget_def): Ditto. * config/riscv/riscv-vector-builtins.h (struct function_group_info): Use required_extensions_specified instead of duplicating code. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr120436.c: New test. Diff: --- gcc/config/riscv/riscv-vector-builtins-shapes.cc | 4 +++ gcc/config/riscv/riscv-vector-builtins.h | 40 +- gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c | 16 + 3 files changed, 21 insertions(+), 39 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc index b855d4c5fa5a..9832eb9e3d1b 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc @@ -908,6 +908,8 @@ struct vset_def : public build_base { poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0)); poly_int64 inner_size = GET_MODE_SIZE (c.arg_mode (2)); +if (maybe_eq (inner_size, 0)) + return false; unsigned int nvecs = exact_div (outer_size, inner_size).to_constant (); return c.require_immediate (1, 0, nvecs - 1); } @@ -920,6 +922,8 @@ struct vget_def : public misc_def { poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0)); poly_int64 inner_size = GET_MODE_SIZE (c.ret_mode ()); +if (maybe_eq (inner_size, 0)) + return false; unsigned int nvecs = exact_div (outer_size, inner_size).to_constant (); return c.require_immediate (1, 0, nvecs - 1); } diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h index ffc289364b06..1f2587ab6afa 100644 --- a/gcc/config/riscv/riscv-vector-builtins.h +++ b/gcc/config/riscv/riscv-vector-builtins.h @@ -331,45 +331,7 @@ struct function_group_info /* Return true if required extension is enabled */ bool match (required_ext ext_value) const { -switch (ext_value) -{ - case VECTOR_EXT: -return TARGET_VECTOR; - case ZVBB_EXT: -return TARGET_ZVBB; - case ZVBB_OR_ZVKB_EXT: -return (TARGET_ZVBB || TARGET_ZVKB); - case ZVBC_EXT: -return TARGET_ZVBC; - case ZVKG_EXT: -return TARGET_ZVKG; - case ZVKNED_EXT: -return TARGET_ZVKNED; - case ZVKNHA_OR_ZVKNHB_EXT: -return (TARGET_ZVKNHA || TARGET_ZVKNHB); - case ZVKNHB_EXT: -return TARGET_ZVKNHB; - case ZVKSED_EXT: -return TARGET_ZVKSED; - case ZVKSH_EXT: -return TARGET_ZVKSH; - case XTHEADVECTOR_EXT: - return TARGET_XTHEADVECTOR; - case ZVFBFMIN_EXT: - return TARGET_ZVFBFMIN; - case ZVFBFWMA_EXT: - return TARGET_ZVFBFWMA; - case XSFVQMACCQOQ_EXT: - return TARGET_XSFVQMACCQOQ; - case XSFVQMACCDOD_EXT: - return TARGET_XSFVQMACCDOD; - case XSFVFNRCLIPXFQF_EXT: - return TARGET_XSFVFNRCLIPXFQF; - case XSFVCP_EXT: - return TARGET_XSFVCP; - default: -gcc_unreachable (); -} +return required_extensions_specified (ext_value); } /* The base name, as a string. */ const char *base_name; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c new file mode 100644 index ..d22091e59490 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr120436.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O0" } */ + +/* Use -O0 as otherwise the unused intrinsics get + optimized away. We used to ICE here instead of + emitting an error. */ + +#include "riscv_vector.h" + +void +clean_subreg (int32_t *in, int32_t *out, size_t m) /* { dg-error {this operation requires the RVV ISA extension} } */ +{ + vint16m8_t v24, v8, v16; + vint32m8_t result = __riscv_vle32_v_i32m8 (in, 32); /* { dg-error {built-in function '__riscv_vle32_v_i32m8\(in, 32\)' requires the 'v' ISA extension} } */ + vint32m1_t v0 = __riscv_vget_v_i32m8_i32m1 (result, 0); +}
[gcc r16-2102] RISC-V: Do not use vsetivli for THeadVector.
https://gcc.gnu.org/g:dab5951af891c58aa4dd60755edb49df882b680a commit r16-2102-gdab5951af891c58aa4dd60755edb49df882b680a Author: Robin Dapp Date: Tue Jul 8 11:35:12 2025 +0200 RISC-V: Do not use vsetivli for THeadVector. In emit_vlmax_insn_lra we use a vsetivli for an immediate AVL. XTHeadVector does not support this, so guard appropriately. PR target/120461 gcc/ChangeLog: * config/riscv/riscv-v.cc (emit_vlmax_insn_lra): Do not emit vsetivli for XTHeadVector. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/xtheadvector/pr120461.c: New test. Diff: --- gcc/config/riscv/riscv-v.cc| 2 +- gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr120461.c | 6 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index a5ab8dd4e2fe..22d194909cfa 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -408,7 +408,7 @@ emit_vlmax_insn_lra (unsigned icode, unsigned insn_flags, rtx *ops, rtx vl) gcc_assert (!can_create_pseudo_p ()); machine_mode mode = GET_MODE (ops[0]); - if (imm_avl_p (mode)) + if (imm_avl_p (mode) && !TARGET_XTHEADVECTOR) { /* Even though VL is a real hardreg already allocated since it is post-RA now, we still gain benefits that we emit diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr120461.c b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr120461.c new file mode 100644 index ..69391570970f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr120461.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=xt-c920 -mrvv-vector-bits=zvl -fzero-call-used-regs=all" */ + +void +foo () +{}
[gcc r16-2101] RISC-V: Ignore non-types in builtin function hash.
https://gcc.gnu.org/g:6968656d631c081889f30c2247bf255e0831 commit r16-2101-g6968656d631c081889f30c2247bf255e0831 Author: Robin Dapp Date: Tue Jul 8 11:17:41 2025 +0200 RISC-V: Ignore non-types in builtin function hash. If a user passes a string that doesn't represent a variable we still try to compute a hash for its type. Its tree does not represent a type but just an exceptional, though. This patch just ignores it, leaving the error to the checking code later. PR target/113829 gcc/ChangeLog: * config/riscv/riscv-vector-builtins.cc (registered_function::overloaded_hash): Skip non-type arguments. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr113829.c: New test. Diff: --- gcc/config/riscv/riscv-vector-builtins.cc | 6 ++ gcc/testsuite/gcc.target/riscv/rvv/base/pr113829.c | 10 ++ 2 files changed, 16 insertions(+) diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index f652a125dc35..8810af0d9ccb 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -4977,6 +4977,12 @@ registered_function::overloaded_hash () const for (unsigned int i = 0; i < argument_types.length (); i++) { type = argument_types[i]; + + /* If we're passed something entirely unreasonable, just ignore here. +We'll warn later anyway. */ + if (TREE_CODE_CLASS (TREE_CODE (type)) != tcc_type) + continue; + unsigned_p = POINTER_TYPE_P (type) ? TYPE_UNSIGNED (TREE_TYPE (type)) : TYPE_UNSIGNED (type); mode_p = POINTER_TYPE_P (type) ? TYPE_MODE (TREE_TYPE (type)) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr113829.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr113829.c new file mode 100644 index ..48c291a92026 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr113829.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=rv64gcv -mabi=lp64d" } */ + +#pragma riscv intrinsic "vector" +void +foo (void) +{ + __riscv_vfredosum_tu (X); /* { dg-error "undeclared" } */ + /* { dg-error "too many arguments" "" { target *-*-* } .-1 } */ +}
[gcc r16-2174] RISC-V: Make zero-stride load broadcast a tunable.
https://gcc.gnu.org/g:dcba959fb30dc250eeb6fdd05aa878e5f1fc8c2d commit r16-2174-gdcba959fb30dc250eeb6fdd05aa878e5f1fc8c2d Author: Robin Dapp Date: Thu Jul 10 09:41:48 2025 +0200 RISC-V: Make zero-stride load broadcast a tunable. This patch makes the zero-stride load broadcast idiom dependent on a uarch-tunable "use_zero_stride_load". Right now we have quite a few paths that reach a strided load and some of them are not exactly straightforward. While broadcast is relatively rare on rv64 targets it is more common on rv32 targets that want to vectorize 64-bit elements. While the patch is more involved than I would have liked it could have even touched more places. The whole broadcast-like insn path feels a bit hackish due to the several optimizations we employ. Some of the complications stem from the fact that we lump together real broadcasts, vector single-element sets, and strided broadcasts. The strided-load alternatives currently require a memory_constraint to work properly which causes more complications when trying to disable just these. In short, the whole pred_broadcast handling in combination with the sew64_scalar_helper could use work in the future. I was about to start with it in this patch but soon realized that it would only distract from the original intent. What can help in the future is split strided and non-strided broadcast entirely, as well as the single-element sets. Yet unclear is whether we need to pay special attention for misaligned strided loads (PR120782). I regtested on rv32 and rv64 with strided_load_broadcast_p forced to true and false. With either I didn't observe any new execution failures but obviously there are new scan failures with strided broadcast turned off. PR target/118734 gcc/ChangeLog: * config/riscv/constraints.md (Wdm): Use tunable for Wdm constraint. * config/riscv/riscv-protos.h (emit_avltype_insn): Declare. (can_be_broadcasted_p): Rename to... (can_be_broadcast_p): ...this. * config/riscv/predicates.md: Use renamed function. (strided_load_broadcast_p): Declare. * config/riscv/riscv-selftests.cc (run_broadcast_selftests): Only run broadcast selftest if strided broadcasts are OK. * config/riscv/riscv-v.cc (emit_avltype_insn): New function. (sew64_scalar_helper): Only emit a pred_broadcast if the new tunable says so. (can_be_broadcasted_p): Rename to... (can_be_broadcast_p): ...this and use new tunable. * config/riscv/riscv.cc (struct riscv_tune_param): Add strided broad tunable. (strided_load_broadcast_p): Implement. * config/riscv/vector.md: Use strided_load_broadcast_p () and work around 64-bit broadcast on rv32 targets. Diff: --- gcc/config/riscv/constraints.md | 7 ++-- gcc/config/riscv/predicates.md | 2 +- gcc/config/riscv/riscv-protos.h | 4 ++- gcc/config/riscv/riscv-selftests.cc | 10 -- gcc/config/riscv/riscv-v.cc | 58 +++- gcc/config/riscv/riscv.cc | 20 +++ gcc/config/riscv/vector.md | 66 +++-- 7 files changed, 133 insertions(+), 34 deletions(-) diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md index ccab1a2e29df..5ecaa19eb014 100644 --- a/gcc/config/riscv/constraints.md +++ b/gcc/config/riscv/constraints.md @@ -237,10 +237,11 @@ (and (match_code "const_vector") (match_test "rtx_equal_p (op, riscv_vector::gen_scalar_move_mask (GET_MODE (op)))"))) -(define_memory_constraint "Wdm" +(define_constraint "Wdm" "Vector duplicate memory operand" - (and (match_code "mem") - (match_code "reg" "0"))) + (and (match_test "strided_load_broadcast_p ()") + (and (match_code "mem") + (match_code "reg" "0" ;; Vendor ISA extension constraints. diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index 8baad2fae7a9..1f9a6b562e53 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -617,7 +617,7 @@ ;; The scalar operand can be directly broadcast by RVV instructions. (define_predicate "direct_broadcast_operand" - (match_test "riscv_vector::can_be_broadcasted_p (op)")) + (match_test "riscv_vector::can_be_broadcast_p (op)")) ;; A CONST_INT operand that has exactly two bits cleared. (define_predicate "const_nottwobits_operand" diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 38f63ea84248..a41c4c299fac 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -604,6 +604,7 @@ void emit_vlmax_vsetvl (machine_mode, rtx); void emit_hard_vlmax_vsetvl (machine_mode, rtx);
[gcc r16-2175] expand: ICE if asked to expand RDIV with non-float type.
https://gcc.gnu.org/g:5aa21765236730c1772c19454cbb71365b84d583 commit r16-2175-g5aa21765236730c1772c19454cbb71365b84d583 Author: Robin Dapp Date: Wed Jul 9 15:58:05 2025 +0200 expand: ICE if asked to expand RDIV with non-float type. This patch adds asserts that ensure we only expand an RDIV_EXPR with actual float mode. It also replaces the RDIV_EXPR in setting a vectorized loop's length by EXACT_DIV_EXPR. The code in question is only used with length-control targets (riscv, powerpc, s390). PR target/121014 gcc/ChangeLog: * cfgexpand.cc (expand_debug_expr): Assert FLOAT_MODE_P. * optabs-tree.cc (optab_for_tree_code): Assert FLOAT_TYPE_P. * tree-vect-loop.cc (vect_get_loop_len): Use EXACT_DIV_EXPR. Diff: --- gcc/cfgexpand.cc | 2 ++ gcc/optabs-tree.cc| 2 ++ gcc/tree-vect-loop.cc | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc index 33649d43f71c..a656ccebf176 100644 --- a/gcc/cfgexpand.cc +++ b/gcc/cfgexpand.cc @@ -5358,6 +5358,8 @@ expand_debug_expr (tree exp) return simplify_gen_binary (MULT, mode, op0, op1); case RDIV_EXPR: + gcc_assert (FLOAT_MODE_P (mode)); + /* Fall through. */ case TRUNC_DIV_EXPR: case EXACT_DIV_EXPR: if (unsignedp) diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc index 6dfe8ee4c4e4..9308a6dfd65c 100644 --- a/gcc/optabs-tree.cc +++ b/gcc/optabs-tree.cc @@ -82,6 +82,8 @@ optab_for_tree_code (enum tree_code code, const_tree type, return unknown_optab; /* FALLTHRU */ case RDIV_EXPR: + gcc_assert (FLOAT_TYPE_P (type)); + /* FALLTHRU */ case TRUNC_DIV_EXPR: case EXACT_DIV_EXPR: if (TYPE_SATURATING (type)) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 8ea0f45d79fc..56f80db57bbc 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -11079,7 +11079,7 @@ vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, factor = exact_div (nunits1, nunits2).to_constant (); tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo); gimple_seq seq = NULL; - loop_len = gimple_build (&seq, RDIV_EXPR, iv_type, loop_len, + loop_len = gimple_build (&seq, EXACT_DIV_EXPR, iv_type, loop_len, build_int_cst (iv_type, factor)); if (seq) gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
[gcc r16-2285] RISC-V: Fix vsetvl merge rule.
https://gcc.gnu.org/g:9041f2bff8202d9b87d8c27f21e4ffa8d50b36a3 commit r16-2285-g9041f2bff8202d9b87d8c27f21e4ffa8d50b36a3 Author: Robin Dapp Date: Mon Jul 14 13:53:12 2025 +0200 RISC-V: Fix vsetvl merge rule. In PR120297 we fuse vsetvl e8,mf2,... vsetvl e64,m1,... into vsetvl e64,m4,... Individually, that's ok but we also change the new vsetvl's demand to "SEW only" even though the first original one demanded SEW >= 8 and ratio = 16. As we forget the ratio after the merge we find that the vsetvl following the merged one has ratio = 64 demand and we fuse into vsetvl e64,m1,.. which obviously doesn't have ratio = 16 any more. Regtested on rv64gcv_zvl512b. PR target/120297 gcc/ChangeLog: * config/riscv/riscv-vsetvl.def: Do not forget ratio demand of previous vsetvl. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/pr120297.c: New test. Diff: --- gcc/config/riscv/riscv-vsetvl.def | 6 ++-- gcc/testsuite/gcc.target/riscv/rvv/pr120297.c | 50 +++ 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/riscv-vsetvl.def b/gcc/config/riscv/riscv-vsetvl.def index d7a5ada772d0..0f999d2276d4 100644 --- a/gcc/config/riscv/riscv-vsetvl.def +++ b/gcc/config/riscv/riscv-vsetvl.def @@ -79,7 +79,7 @@ DEF_SEW_LMUL_RULE (sew_only, sew_only, sew_only, sew_eq_p, sew_eq_p, nop) DEF_SEW_LMUL_RULE (sew_only, ge_sew, sew_only, sew_ge_and_prev_sew_le_next_max_sew_p, sew_ge_p, nop) DEF_SEW_LMUL_RULE ( - sew_only, ratio_and_ge_sew, sew_lmul, + sew_only, ratio_and_ge_sew, ratio_and_ge_sew, sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p, always_false, modify_lmul_with_next_ratio) @@ -104,9 +104,9 @@ DEF_SEW_LMUL_RULE (ratio_and_ge_sew, sew_lmul, sew_lmul, DEF_SEW_LMUL_RULE (ratio_and_ge_sew, ratio_only, ratio_and_ge_sew, ratio_eq_p, ratio_eq_p, use_max_sew_and_lmul_with_prev_ratio) DEF_SEW_LMUL_RULE ( - ratio_and_ge_sew, sew_only, sew_only, + ratio_and_ge_sew, sew_only, ratio_and_ge_sew, sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p, - always_false, use_next_sew_with_prev_ratio) + sew_eq_p, use_next_sew_with_prev_ratio) DEF_SEW_LMUL_RULE (ratio_and_ge_sew, ge_sew, ratio_and_ge_sew, max_sew_overlap_and_prev_ratio_valid_for_next_sew_p, sew_ge_p, use_max_sew_and_lmul_with_prev_ratio) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/pr120297.c b/gcc/testsuite/gcc.target/riscv/rvv/pr120297.c new file mode 100644 index ..3d1845d0fe66 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/pr120297.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fwhole-program" } */ + +unsigned a; +short c; +char d; +unsigned long e; +_Bool f[10][10]; +unsigned g[10]; +long long ak; +char i = 7; +long long t[10]; +short x[10][10][10][10]; +short y[10][10][10][10]; + +void +h (char i, long long t[], short x[][10][10][10], short y[][10][10][10], + _Bool aa) +{ + for (int j = 2; j < 8; j += 2) +{ + for (short k = 0; k < 10; k++) + { + for (int l = 3; l < 8; l += 2) + a = x[1][j][k][l]; + c = x[c][1][1][c]; + } + for (int k = 0; k < 10; k++) + { + f[2][k] |= (_Bool) t[c]; + g[c] = t[c + 1]; + d += y[j][1][k][k]; + e = e > i ? e : i; + } +} +} + +int +main () +{ + t[c] = 1; + h (i, t, x, y, a); + for (int j = 0; j < 10; ++j) +for (int k = 0; k < 10; ++k) + ak ^= f[j][k] + 238516665 + (ak >> 2); + ak ^= g[c] + 238516665 + (ak >> 2); + if (ak != 234635118ull) +__builtin_abort (); +}
[gcc r16-2286] expand: Allow fixed-point arithmetic for RDIV_EXPR.
https://gcc.gnu.org/g:4648fe556e26d54abfc863ebb2ed046f1a260fdb commit r16-2286-g4648fe556e26d54abfc863ebb2ed046f1a260fdb Author: Robin Dapp Date: Tue Jul 15 10:55:36 2025 +0200 expand: Allow fixed-point arithmetic for RDIV_EXPR. r16-2175-g5aa21765236730 introduced an assert for floating-point modes when expanding an RDIV_EXPR but forgot fixed-point modes. This patch adds ALL_FIXED_POINT_MODE_P to the assert. PR middle-end/121065 gcc/ChangeLog: * cfgexpand.cc (expand_debug_expr): Allow fixed-point modes for RDIV_EXPR. * optabs-tree.cc (optab_for_tree_code): Ditto. gcc/testsuite/ChangeLog: * gcc.target/arm/pr121065.c: New test. Diff: --- gcc/cfgexpand.cc| 3 ++- gcc/optabs-tree.cc | 3 ++- gcc/testsuite/gcc.target/arm/pr121065.c | 11 +++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc index a656ccebf176..8a55f4f472a2 100644 --- a/gcc/cfgexpand.cc +++ b/gcc/cfgexpand.cc @@ -5358,7 +5358,8 @@ expand_debug_expr (tree exp) return simplify_gen_binary (MULT, mode, op0, op1); case RDIV_EXPR: - gcc_assert (FLOAT_MODE_P (mode)); + gcc_assert (FLOAT_MODE_P (mode) + || ALL_FIXED_POINT_MODE_P (mode)); /* Fall through. */ case TRUNC_DIV_EXPR: case EXACT_DIV_EXPR: diff --git a/gcc/optabs-tree.cc b/gcc/optabs-tree.cc index 9308a6dfd65c..0de74c7966af 100644 --- a/gcc/optabs-tree.cc +++ b/gcc/optabs-tree.cc @@ -82,7 +82,8 @@ optab_for_tree_code (enum tree_code code, const_tree type, return unknown_optab; /* FALLTHRU */ case RDIV_EXPR: - gcc_assert (FLOAT_TYPE_P (type)); + gcc_assert (FLOAT_TYPE_P (type) + || ALL_FIXED_POINT_MODE_P (TYPE_MODE (type))); /* FALLTHRU */ case TRUNC_DIV_EXPR: case EXACT_DIV_EXPR: diff --git a/gcc/testsuite/gcc.target/arm/pr121065.c b/gcc/testsuite/gcc.target/arm/pr121065.c new file mode 100644 index ..dfc6059a46d2 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr121065.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=cortex-m55" } */ + +_Accum sa; +char c; + +void +div_csa () +{ + c /= sa; +}
[gcc r16-2451] RISC-V: testsuite: Fix vx_vf_*run-1-f16.c run tests.
https://gcc.gnu.org/g:4b62bd519a852af8f69b077ba7b0ed531f4a14c5 commit r16-2451-g4b62bd519a852af8f69b077ba7b0ed531f4a14c5 Author: Robin Dapp Date: Mon Jul 21 15:32:09 2025 +0200 RISC-V: testsuite: Fix vx_vf_*run-1-f16.c run tests. This patch fixes the vf_vfmacc-run-1-f16.c test failures on rv32 by adding zvfh requirements as well as options to the test and the target harness. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c: Add zvfh requirements and options. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmacc-run-1-f16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmadd-run-1-f16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsac-run-1-f16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsub-run-1-f16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmacc-run-1-f16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmsac-run-1-f16.c: Ditto. * lib/target-supports.exp: Add zvfh options. Diff: --- .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c | 6 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c | 6 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c | 6 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c | 6 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmacc-run-1-f16.c | 6 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmadd-run-1-f16.c | 6 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsac-run-1-f16.c | 6 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfnmsub-run-1-f16.c | 6 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmacc-run-1-f16.c | 6 +- .../gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmsac-run-1-f16.c | 6 +- gcc/testsuite/lib/target-supports.exp | 8 11 files changed, 58 insertions(+), 10 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c index 982dd9736acc..fd8aa30be17a 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmacc-run-1-f16.c @@ -1,5 +1,9 @@ /* { dg-do run { target { riscv_v } } } */ -/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-require-effective-target riscv_zvfh } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvfh" } */ +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ #include "vf_mulop.h" #include "vf_mulop_data.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c index 400bbcd1d79f..8fd855288993 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c @@ -1,5 +1,9 @@ /* { dg-do run { target { riscv_v } } } */ -/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-require-effective-target riscv_zvfh } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvfh" } */ +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ #include "vf_mulop.h" #include "vf_mulop_data.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c index 21c1860c0f1c..e91fd15a5b73 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsac-run-1-f16.c @@ -1,5 +1,9 @@ /* { dg-do run { target { riscv_v } } } */ -/* { dg-additional-options "-march=rv64gcv_zvfh --param=fpr2vr-cost=0" } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-require-effective-target riscv_zvfh } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvfh" } */ +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ #include "vf_mulop.h" #include "vf_mulop_data.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c index 163b5bd21b42..ca7e0db17b5b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c +++ b/gcc/testsuite/gcc.targe
[gcc r16-2452] RISC-V: Rework broadcast handling [PR121073].
https://gcc.gnu.org/g:f796f819c35cc08e0a6e7242aee92eb96f62a3b5 commit r16-2452-gf796f819c35cc08e0a6e7242aee92eb96f62a3b5 Author: Robin Dapp Date: Thu Jul 17 11:09:43 2025 +0200 RISC-V: Rework broadcast handling [PR121073]. During the last weeks it became clear that our current broadcast handling needs an overhaul in order to improve maintainability. PR121073 showed that my intermediate fix wasn't enough and caused regressions. This patch now goes a first step towards untangling broadcast (vmv.v.x), "set first" (vmv.s.x), and zero-strided load (vlse). Also can_be_broadcast_p is rewritten and strided_broadcast_p is introduced to make the distinction clear directly in the predicates. Due to the pervasiveness of the patterns I needed to touch a lot of places and tried to clear up some things while at it. The patch therefore also introduces new helpers expand_broadcast for vmv.v.x that dispatches to regular as well as strided broadcast and expand_set_first that does the same thing for vmv.s.x. The non-strided fallbacks are now implemented as splitters of the strided variants. This makes it easier to see where and when things happen. The test cases I touched appeared wrong to me so this patch sets a new baseline for some of the scalar_move tests. There is still work to be done but IMHO that can be deferred: It would be clearer if the three broadcast-like variants differed not just in name but also in RTL pattern so matching is not as confusing. Right now vmv.v.x and vmv.s.x only differ in the mask and are interchangeable by just changing it from "all ones" to a "single one". As last time, I regtested on rv64 and rv32 with strided_broadcast turned on and off. Note there are regressions cond_fma_fnma-[78].c. Those are due to the patch exposing more fwprop/late-combine opportunities. For fma/fnma we don't yet have proper costing for vv/vx in place but I'll expect that to be addressed soon and figured we can live with those for the time being. PR target/121073 gcc/ChangeLog: * config/riscv/autovec-opt.md: Use new helpers. * config/riscv/autovec.md: Ditto. * config/riscv/predicates.md (strided_broadcast_mask_operand): New predicate. (strided_broadcast_operand): Ditto. (any_broadcast_operand): Ditto. * config/riscv/riscv-protos.h (expand_broadcast): Declare. (expand_set_first): Ditto. (expand_set_first_tu): Ditto. (strided_broadcast_p): Ditto. * config/riscv/riscv-string.cc (expand_vec_setmem): Use new helpers. * config/riscv/riscv-v.cc (expand_broadcast): New functionk. (expand_set_first): Ditto. (expand_set_first_tu): Ditto. (expand_const_vec_duplicate): Use new helpers. (expand_const_vector_duplicate_repeating): Ditto. (expand_const_vector_duplicate_default): Ditto. (sew64_scalar_helper): Ditto. (expand_vector_init_merge_repeating_sequence): Ditto. (expand_reduction): Ditto. (strided_broadcast_p): New function. (whole_reg_to_reg_move_p): Use new helpers. * config/riscv/riscv-vector-builtins-bases.cc: Use either broadcast or strided broadcast. * config/riscv/riscv-vector-builtins.cc (function_expander::use_ternop_insn): Ditto. (function_expander::use_widen_ternop_insn): Ditto. (function_expander::use_scalar_broadcast_insn): Ditto. * config/riscv/riscv-vector-builtins.h: Declare scalar broadcast. * config/riscv/vector.md (*pred_broadcast): Split into regular and strided broadcast. (*pred_broadcast_zvfh): Split. (pred_broadcast_zvfh): Ditto. (*pred_broadcast_zvfhmin): Ditto. (@pred_strided_broadcast): Ditto. (*pred_strided_broadcast): Ditto. (*pred_strided_broadcast_zvfhmin): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls-vlmax/repeat-6.c: Adjust test expectation. * gcc.target/riscv/rvv/base/scalar_move-5.c: Ditto. * gcc.target/riscv/rvv/base/scalar_move-6.c: Ditto. * gcc.target/riscv/rvv/base/scalar_move-7.c: Ditto. * gcc.target/riscv/rvv/base/scalar_move-8.c: Ditto. * gcc.target/riscv/rvv/base/scalar_move-9.c: Ditto. * gcc.target/riscv/rvv/pr121073.c: New test. Diff: --- gcc/config/riscv/autovec-opt.md| 3 +- gcc/config/riscv/autovec.md| 7 +- gcc/config/riscv/predicates.md | 13 + gcc/config/riscv/riscv-protos.h| 4 + gcc/config/riscv/riscv-st
[gcc r16-2482] vect: Misalign checks for gather/scatter.
https://gcc.gnu.org/g:83afbe552e6b0baff37210dd1f344cfee4b26a2f commit r16-2482-g83afbe552e6b0baff37210dd1f344cfee4b26a2f Author: Robin Dapp Date: Thu Jul 3 11:04:29 2025 +0200 vect: Misalign checks for gather/scatter. This patch adds simple misalignment checks for gather/scatter operations. Previously, we assumed that those perform element accesses internally so alignment does not matter. The riscv vector spec however explicitly states that vector operations are allowed to fault on element-misaligned accesses. Reasonable uarchs won't, but... For gather/scatter we have two paths in the vectorizer: (1) Regular analysis based on datarefs. Here we can also create strided loads. (2) Non-affine access where each gather index is relative to the initial address. The assumption this patch works on is that once the alignment for the first scalar is correct, all others will fall in line, as the index is always a multiple of the first element's size. For (1) we have a dataref and can check it for alignment as in other cases. For (2) this patch checks the object alignment of BASE and compares it against the natural alignment of the current vectype's unit. The patch also adds a pointer argument to the gather/scatter IFNs that contains the necessary alignment. Most of the patch is thus mechanical in that it merely adjusts indices. I tested the riscv version with a custom qemu version that faults on element-misaligned vector accesses. With this patch applied, there is just a single fault left, which is due to PR120782 and which will be addressed separately. Bootstrapped and regtested on x86 and aarch64. Regtested on rv64gcv_zvl512b with and without unaligned vector support. gcc/ChangeLog: * internal-fn.cc (internal_fn_len_index): Adjust indices for new alias_ptr param. (internal_fn_else_index): Ditto. (internal_fn_mask_index): Ditto. (internal_fn_stored_value_index): Ditto. (internal_fn_alias_ptr_index): Ditto. (internal_fn_offset_index): Ditto. (internal_fn_scale_index): Ditto. (internal_gather_scatter_fn_supported_p): Ditto. * internal-fn.h (internal_fn_alias_ptr_index): Ditto. * optabs-query.cc (supports_vec_gather_load_p): Ditto. * tree-vect-data-refs.cc (vect_check_gather_scatter): Add alias pointer. * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Add alias pointer. * tree-vect-slp.cc (vect_get_operand_map): Adjust for alias pointer. * tree-vect-stmts.cc (vect_truncate_gather_scatter_offset): Add alias pointer and misalignment handling. (get_load_store_type): Move from here... (get_group_load_store_type): ...To here. (vectorizable_store): Add alias pointer. (vectorizable_load): Ditto. * tree-vectorizer.h (struct gather_scatter_info): Ditto. Diff: --- gcc/internal-fn.cc | 43 --- gcc/internal-fn.h | 1 + gcc/optabs-query.cc| 6 +- gcc/tree-vect-data-refs.cc | 61 +++ gcc/tree-vect-patterns.cc | 17 +++-- gcc/tree-vect-slp.cc | 16 ++-- gcc/tree-vect-stmts.cc | 179 + gcc/tree-vectorizer.h | 7 +- 8 files changed, 206 insertions(+), 124 deletions(-) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 1411f4497892..bf2fac818070 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4967,11 +4967,13 @@ internal_fn_len_index (internal_fn fn) return 2; case IFN_MASK_LEN_SCATTER_STORE: + return 6; + case IFN_MASK_LEN_STRIDED_LOAD: return 5; case IFN_MASK_LEN_GATHER_LOAD: - return 6; + return 7; case IFN_COND_LEN_FMA: case IFN_COND_LEN_FMS: @@ -5075,7 +5077,7 @@ internal_fn_else_index (internal_fn fn) case IFN_MASK_GATHER_LOAD: case IFN_MASK_LEN_GATHER_LOAD: - return 5; + return 6; default: return -1; @@ -5110,7 +5112,7 @@ internal_fn_mask_index (internal_fn fn) case IFN_MASK_SCATTER_STORE: case IFN_MASK_LEN_GATHER_LOAD: case IFN_MASK_LEN_SCATTER_STORE: - return 4; + return 5; case IFN_VCOND_MASK: case IFN_VCOND_MASK_LEN: @@ -5135,10 +5137,11 @@ internal_fn_stored_value_index (internal_fn fn) case IFN_MASK_STORE: case IFN_MASK_STORE_LANES: + return 3; case IFN_SCATTER_STORE: case IFN_MASK_SCATTER_STORE: case IFN_MASK_LEN_SCATTER_STORE: - return 3; + return 4; case IFN_LEN_STORE: return 4; @@ -5152,6 +5155,28 @@ internal_fn_stored_value_index (internal_fn fn) } } +/* If FN has an alias pointer return its index, otherwise return -
[gcc r16-2480] vect: Add helper macros for gather/scatter.
https://gcc.gnu.org/g:d581d84f919cd269b1fd153d9af51eb9efd10d82 commit r16-2480-gd581d84f919cd269b1fd153d9af51eb9efd10d82 Author: Robin Dapp Date: Thu Jul 3 11:04:25 2025 +0200 vect: Add helper macros for gather/scatter. This encapsulates the IFN and the builtin-function way of handling gather/scatter via three defines: GATHER_SCATTER_IFN_P GATHER_SCATTER_LEGACY_P GATHER_SCATTER_EMULATED_P and introduces a helper define for SLP operand handling as well. gcc/ChangeLog: * tree-vect-slp.cc (GATHER_SCATTER_OFFSET): New define. (vect_get_and_check_slp_defs): Use. * tree-vectorizer.h (GATHER_SCATTER_LEGACY_P): New define. (GATHER_SCATTER_IFN_P): Ditto. (GATHER_SCATTER_EMULATED_P): Ditto. * tree-vect-stmts.cc (vectorizable_store): Use. (vectorizable_load): Use. Diff: --- gcc/tree-vect-slp.cc | 12 +++- gcc/tree-vect-stmts.cc | 19 +-- gcc/tree-vectorizer.h | 8 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 7776b2f1d8e6..cfa841b3ce23 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -507,6 +507,8 @@ vect_def_types_match (enum vect_def_type dta, enum vect_def_type dtb) && (dtb == vect_external_def || dtb == vect_constant_def))); } +#define GATHER_SCATTER_OFFSET (-3) + static const int no_arg_map[] = { 0 }; static const int arg0_map[] = { 1, 0 }; static const int arg1_map[] = { 1, 1 }; @@ -516,10 +518,10 @@ static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 }; static const int arg1_arg3_arg4_map[] = { 3, 1, 3, 4 }; static const int arg3_arg2_map[] = { 2, 3, 2 }; static const int op1_op0_map[] = { 2, 1, 0 }; -static const int off_map[] = { 1, -3 }; -static const int off_op0_map[] = { 2, -3, 0 }; -static const int off_arg2_arg3_map[] = { 3, -3, 2, 3 }; -static const int off_arg3_arg2_map[] = { 3, -3, 3, 2 }; +static const int off_map[] = { 1, GATHER_SCATTER_OFFSET }; +static const int off_op0_map[] = { 2, GATHER_SCATTER_OFFSET, 0 }; +static const int off_arg2_arg3_map[] = { 3, GATHER_SCATTER_OFFSET, 2, 3 }; +static const int off_arg3_arg2_map[] = { 3, GATHER_SCATTER_OFFSET, 3, 2 }; static const int mask_call_maps[6][7] = { { 1, 1, }, { 2, 1, 2, }, @@ -691,7 +693,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, { oprnd_info = (*oprnds_info)[i]; int opno = map ? map[i] : int (i); - if (opno == -3) + if (opno == GATHER_SCATTER_OFFSET) { gcc_assert (STMT_VINFO_GATHER_SCATTER_P (stmt_info)); if (!is_a (vinfo) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 7eb072b66dd2..5b8168fdea14 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2455,7 +2455,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, If that failed for some reason (e.g. because another pattern took priority), just handle cases in which the offset already has the right type. */ - else if (gs_info->ifn != IFN_LAST + else if (GATHER_SCATTER_IFN_P (*gs_info) && !is_gimple_call (stmt_info->stmt) && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), TREE_TYPE (gs_info->offset_vectype))) @@ -8104,7 +8104,8 @@ vectorizable_store (vec_info *vinfo, } else if (memory_access_type != VMAT_LOAD_STORE_LANES && (memory_access_type != VMAT_GATHER_SCATTER - || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype + || (GATHER_SCATTER_LEGACY_P (gs_info) + && !VECTOR_BOOLEAN_TYPE_P (mask_vectype { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -8112,8 +8113,7 @@ vectorizable_store (vec_info *vinfo, return false; } else if (memory_access_type == VMAT_GATHER_SCATTER - && gs_info.ifn == IFN_LAST - && !gs_info.decl) + && GATHER_SCATTER_EMULATED_P (gs_info)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -8838,7 +8838,7 @@ vectorizable_store (vec_info *vinfo, final_mask, vec_mask, gsi); } - if (gs_info.ifn != IFN_LAST) + if (GATHER_SCATTER_IFN_P (gs_info)) { if (costing_p) { @@ -8901,7 +8901,7 @@ vectorizable_store (vec_info *vinfo, vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); new_stmt = call; } - else if (gs_info.decl) + else if (GATHER_SCATTER_LEGACY_P (gs_info)) { /* The builtin decls path for scatter is legacy, x86 only. */ g
[gcc r16-2481] vect: Add is_gather_scatter argument to misalignment hook.
https://gcc.gnu.org/g:de75875d6779d7dc144aea2c5d26fb9aa2373d04 commit r16-2481-gde75875d6779d7dc144aea2c5d26fb9aa2373d04 Author: Robin Dapp Date: Wed Jul 2 10:02:16 2025 +0200 vect: Add is_gather_scatter argument to misalignment hook. This patch adds an is_gather_scatter argument to the support_vector_misalignment hook. All targets but riscv do not care about alignment for gather/scatter so return true for is_gather_scatter. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_builtin_support_vector_misalignment): Return true for gather/scatter. * config/arm/arm.cc (arm_builtin_support_vector_misalignment): Ditto. * config/epiphany/epiphany.cc (epiphany_support_vector_misalignment): Ditto. * config/gcn/gcn.cc (gcn_vectorize_support_vector_misalignment): Ditto. * config/loongarch/loongarch.cc (loongarch_builtin_support_vector_misalignment): Ditto. * config/riscv/riscv.cc (riscv_support_vector_misalignment): Add gather/scatter argument. * config/rs6000/rs6000.cc (rs6000_builtin_support_vector_misalignment): Return true for gather/scatter. * config/s390/s390.cc (s390_support_vector_misalignment): Ditto. * doc/tm.texi: Add argument. * target.def: Ditto. * targhooks.cc (default_builtin_support_vector_misalignment): Ditto. * targhooks.h (default_builtin_support_vector_misalignment): Ditto. * tree-vect-data-refs.cc (vect_supportable_dr_alignment): Ditto. Diff: --- gcc/config/aarch64/aarch64.cc | 12 +--- gcc/config/arm/arm.cc | 12 +--- gcc/config/epiphany/epiphany.cc | 8 ++-- gcc/config/gcn/gcn.cc | 6 +- gcc/config/loongarch/loongarch.cc | 8 ++-- gcc/config/riscv/riscv.cc | 29 +++-- gcc/config/rs6000/rs6000.cc | 11 ++- gcc/config/s390/s390.cc | 6 -- gcc/doc/tm.texi | 8 +--- gcc/target.def| 14 +- gcc/targhooks.cc | 2 ++ gcc/targhooks.h | 2 +- gcc/tree-vect-data-refs.cc| 2 +- 13 files changed, 90 insertions(+), 30 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 9e4a37bcaff0..2871b5f6e002 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -356,7 +356,8 @@ static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool); static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, -bool is_packed); +bool is_packed, +bool is_gather_scatter); static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64); static bool aarch64_print_address_internal (FILE*, machine_mode, rtx, aarch64_addr_query_type); @@ -24414,10 +24415,14 @@ aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed) static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, -bool is_packed) +bool is_packed, +bool is_gather_scatter) { if (TARGET_SIMD && STRICT_ALIGNMENT) { + if (is_gather_scatter) + return true; + /* Return if movmisalign pattern is not supported for this mode. */ if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing) return false; @@ -24427,7 +24432,8 @@ aarch64_builtin_support_vector_misalignment (machine_mode mode, return false; } return default_builtin_support_vector_misalignment (mode, type, misalignment, - is_packed); + is_packed, + is_gather_scatter); } /* If VALS is a vector constant that can be loaded into a register diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index bde06f3fa866..29b45ae96bda 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -289,7 +289,8 @@ static bool arm_vector_alignment_reachable (const_tree type, bool is_packed); static bool arm_builtin_support_vector_misalignment (machine_mode mode, const_tree type,
[gcc r16-2479] ifn: Add helper functions for gather/scatter.
https://gcc.gnu.org/g:a901860c34ff8966b33f5729d7872a53b3bfa722 commit r16-2479-ga901860c34ff8966b33f5729d7872a53b3bfa722 Author: Robin Dapp Date: Wed Jul 2 10:04:58 2025 +0200 ifn: Add helper functions for gather/scatter. This patch adds access helpers for the gather/scatter offset and scale parameters. gcc/ChangeLog: * internal-fn.cc (expand_scatter_store_optab_fn): Use new function. (expand_gather_load_optab_fn): Ditto. (internal_fn_offset_index): Ditto. (internal_fn_scale_index): Ditto. * internal-fn.h (internal_fn_offset_index): New function. (internal_fn_scale_index): Ditto. * tree-vect-data-refs.cc (vect_describe_gather_scatter_call): Use new function. Diff: --- gcc/internal-fn.cc | 57 ++ gcc/internal-fn.h | 2 ++ gcc/tree-vect-data-refs.cc | 6 +++-- 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 114f5a9da18d..1411f4497892 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -3652,8 +3652,8 @@ expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab) internal_fn ifn = gimple_call_internal_fn (stmt); int rhs_index = internal_fn_stored_value_index (ifn); tree base = gimple_call_arg (stmt, 0); - tree offset = gimple_call_arg (stmt, 1); - tree scale = gimple_call_arg (stmt, 2); + tree offset = gimple_call_arg (stmt, internal_fn_offset_index (ifn)); + tree scale = gimple_call_arg (stmt, internal_fn_scale_index (ifn)); tree rhs = gimple_call_arg (stmt, rhs_index); rtx base_rtx = expand_normal (base); @@ -3678,12 +3678,12 @@ expand_scatter_store_optab_fn (internal_fn, gcall *stmt, direct_optab optab) /* Expand {MASK_,}GATHER_LOAD call CALL using optab OPTAB. */ static void -expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab) +expand_gather_load_optab_fn (internal_fn ifn, gcall *stmt, direct_optab optab) { tree lhs = gimple_call_lhs (stmt); tree base = gimple_call_arg (stmt, 0); - tree offset = gimple_call_arg (stmt, 1); - tree scale = gimple_call_arg (stmt, 2); + tree offset = gimple_call_arg (stmt, internal_fn_offset_index (ifn)); + tree scale = gimple_call_arg (stmt, internal_fn_scale_index (ifn)); rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); rtx base_rtx = expand_normal (base); @@ -5152,6 +5152,53 @@ internal_fn_stored_value_index (internal_fn fn) } } +/* If FN is a gather/scatter return the index of its offset argument, + otherwise return -1. */ + +int +internal_fn_offset_index (internal_fn fn) +{ + if (!internal_gather_scatter_fn_p (fn)) +return -1; + + switch (fn) +{ +case IFN_GATHER_LOAD: +case IFN_MASK_GATHER_LOAD: +case IFN_MASK_LEN_GATHER_LOAD: +case IFN_SCATTER_STORE: +case IFN_MASK_SCATTER_STORE: +case IFN_MASK_LEN_SCATTER_STORE: + return 1; + +default: + return -1; +} +} + +/* If FN is a gather/scatter return the index of its scale argument, + otherwise return -1. */ + +int +internal_fn_scale_index (internal_fn fn) +{ + if (!internal_gather_scatter_fn_p (fn)) +return -1; + + switch (fn) +{ +case IFN_GATHER_LOAD: +case IFN_MASK_GATHER_LOAD: +case IFN_MASK_LEN_GATHER_LOAD: +case IFN_SCATTER_STORE: +case IFN_MASK_SCATTER_STORE: +case IFN_MASK_LEN_SCATTER_STORE: + return 2; + +default: + return -1; +} +} /* Store all supported else values for the optab referred to by ICODE in ELSE_VALS. The index of the else operand must be specified in diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 02731ea03aeb..825381660bb1 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -240,6 +240,8 @@ extern int internal_fn_mask_index (internal_fn); extern int internal_fn_len_index (internal_fn); extern int internal_fn_else_index (internal_fn); extern int internal_fn_stored_value_index (internal_fn); +extern int internal_fn_offset_index (internal_fn fn); +extern int internal_fn_scale_index (internal_fn fn); extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree, tree, tree, int, vec * = nullptr); diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 5f672132a8ac..32f3f0e1fa9e 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -4539,10 +4539,12 @@ vect_describe_gather_scatter_call (stmt_vec_info stmt_info, info->ifn = gimple_call_internal_fn (call); info->decl = NULL_TREE; info->base = gimple_call_arg (call, 0); - info->offset = gimple_call_arg (call, 1); + info->offset = gimple_call_arg + (call, internal_fn_offset_index (info->ifn)); info->offset_dt = vect_unknown_def_type; info->offset_vectype = NUL
[gcc r16-2483] riscv: testsuite: Fix misalignment check.
https://gcc.gnu.org/g:45665da440d07e5f559a7638e40e40099acbe78a commit r16-2483-g45665da440d07e5f559a7638e40e40099acbe78a Author: Robin Dapp Date: Wed Jul 2 10:28:57 2025 +0200 riscv: testsuite: Fix misalignment check. This fixes a thinko in the misalignment check. If we want to check for vector misalignment support we need to load 16-byte elements, not 8-byte elements that will never be misaligned. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Fix misalignment check. Diff: --- gcc/testsuite/lib/target-supports.exp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 018a6d3a764d..e375b1ec02b8 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2440,7 +2440,7 @@ proc check_effective_target_riscv_v_misalign_ok { } { = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; asm ("vsetivli zero,7,e8,m1,ta,ma"); asm ("addi a7,%0,1" : : "r" (a) : "a7" ); - asm ("vle8.v v8,0(a7)" : : : "v8"); + asm ("vle16.v v8,0(a7)" : : : "v8"); return 0; } } "-march=${gcc_march}"] } { return 1 }
[gcc r16-2515] RISC-V: Prepare dynamic LMUL heuristic for SLP.
https://gcc.gnu.org/g:529ae1484a992aba184509eebb71d6595768c887 commit r16-2515-g529ae1484a992aba184509eebb71d6595768c887 Author: Robin Dapp Date: Mon Jul 21 16:00:51 2025 +0200 RISC-V: Prepare dynamic LMUL heuristic for SLP. This patch prepares the dynamic LMUL vector costing to use the coming SLP_TREE_TYPE instead of the (to-be-removed) STMT_VINFO_TYPE. Even though the whole approach should be reviewed and adjusted at some point, the patch chooses the path of least resistance and uses a hash map for the stmt_info -> slp node relationship. A node is mapped to the accompanying stmt_info during add_stmt_cost. In finish_cost we go through all statements as before, and obtain the corresponding slp nodes as well as their types. This allows us to operate largely as before. We don't yet do the switch over from STMT_VINFO_TYPE to SLP_TREE_TYPE, though but only take care of the necessary refactoring upfront. Regtested on rv64gcv_zvl512b with -mrvv-max-lmul=dynamic. There are a few regressions but nothing worse than what we already have. I'd rather accept these now and take it as an incentive to work on the heuristic later than block the SLP work until it is fixed. gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (get_live_range): Move compute_local_program_points to cost class. (variable_vectorized_p): Add slp node parameter. (need_additional_vector_vars_p): Move from here... (costs::need_additional_vector_vars_p): ... to here and add slp parameter. (compute_estimated_lmul): Move update_local_live_ranges to cost class. (has_unexpected_spills_p): Move from here... (costs::has_unexpected_spills_p): ... to here. (costs::record_lmul_spills): New function. (costs::add_stmt_cost): Add stmt_info, slp mapping. (costs::finish_cost): Analyze loop. * config/riscv/riscv-vector-costs.h: Move declarations to class. Diff: --- gcc/config/riscv/riscv-vector-costs.cc | 71 ++ gcc/config/riscv/riscv-vector-costs.h | 16 2 files changed, 62 insertions(+), 25 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 4d8170de9b2c..df924fafd8e5 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -178,8 +178,8 @@ get_live_range (hash_map *live_ranges, tree arg) STMT 5 (be vectorized) -- point 2 ... */ -static void -compute_local_program_points ( +void +costs::compute_local_program_points ( vec_info *vinfo, hash_map> &program_points_per_bb) { @@ -274,14 +274,14 @@ loop_invariant_op_p (class loop *loop, /* Return true if the variable should be counted into liveness. */ static bool -variable_vectorized_p (class loop *loop, stmt_vec_info stmt_info, tree var, - bool lhs_p) +variable_vectorized_p (class loop *loop, stmt_vec_info stmt_info, + slp_tree node ATTRIBUTE_UNUSED, tree var, bool lhs_p) { if (!var) return false; gimple *stmt = STMT_VINFO_STMT (stmt_info); - enum stmt_vec_info_type type -= STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info)); + stmt_info = vect_stmt_to_vectorize (stmt_info); + enum stmt_vec_info_type type = STMT_VINFO_TYPE (stmt_info); if (is_gimple_call (stmt) && gimple_call_internal_p (stmt)) { if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE @@ -357,8 +357,8 @@ variable_vectorized_p (class loop *loop, stmt_vec_info stmt_info, tree var, The live range of SSA 1 is [1, 3] in bb 2. The live range of SSA 2 is [0, 4] in bb 3. */ -static machine_mode -compute_local_live_ranges ( +machine_mode +costs::compute_local_live_ranges ( loop_vec_info loop_vinfo, const hash_map> &program_points_per_bb, hash_map> &live_ranges_per_bb) @@ -388,8 +388,11 @@ compute_local_live_ranges ( unsigned int point = program_point.point; gimple *stmt = program_point.stmt; tree lhs = gimple_get_lhs (stmt); - if (variable_vectorized_p (loop, program_point.stmt_info, lhs, -true)) + slp_tree *node = vinfo_slp_map.get (program_point.stmt_info); + if (!node) + continue; + if (variable_vectorized_p (loop, program_point.stmt_info, +*node, lhs, true)) { biggest_mode = get_biggest_mode (biggest_mode, TYPE_MODE (TREE_TYPE (lhs))); @@ -406,8 +409,8 @@ compute_local_live_ranges ( for (i = 0; i < gimple_num_args (stmt); i++) { tree var = gimple_arg (stmt, i); - if (variable_vectorized_p