https://gcc.gnu.org/g:40ad10f708b19d3e88948ac820fbfb9f3c3689ae
commit r15-6906-g40ad10f708b19d3e88948ac820fbfb9f3c3689ae Author: Kito Cheng <kito.ch...@sifive.com> Date: Mon Dec 23 23:23:44 2024 +0800 RISC-V: Fix code gen for reduction with length 0 [PR118182] `.MASK_LEN_FOLD_LEFT_PLUS`(or `mask_len_fold_left_plus_m`) is expecting the return value will be the start value even if the length is 0. However current code gen in RISC-V backend is not meet that semantic, it will result a random garbage value if length is 0. Let example by current code gen for MASK_LEN_FOLD_LEFT_PLUS with f64: # _148 = .MASK_LEN_FOLD_LEFT_PLUS (stmp__148.33_134, vect__70.32_138, { -1, ... }, loop_len_161, 0); vsetvli zero,a5,e64,m1,ta,ma vfmv.s.f v2,fa5 # insn 1 vfredosum.vs v1,v1,v2 # insn 2 vfmv.f.s fa5,v1 # insn 3 insn 1: - vfmv.s.f won't do anything if VL=0, which means v2 will contain garbage value. insn 2: - vfredosum.vs won't do anything if VL=0, and keep vd unchanged even TA. (v-spec say: `If vl=0, no operation is performed and the destination register is not updated.`) insn 3: - vfmv.f.s will move the value from v1 even VL=0, so this is safe. So how we fix that? we need two fix for that: 1. insn 1: need always execute with VL=1, so that we can guarantee it will always work as expect. 2. insn 2: Add new pattern to force `vd` use same reg as `vs1` (start value) for all reduction patterns, then we can guarantee vd[0] will contain the start value when vl=0 For 1, it's just a simple change to riscv_vector::expand_reduction, but for 2, we have to add _VL0_SAFE variant reduction to force `vd` use same reg as `vs1` (start value). Change since V3: - Rename _AV to _VL0_SAFE for readability. - Use non-VL0_SAFE version if VL is const or VLMAX. - Only force VL=1 for vfmv.s.f when VL is non-const and non-VLMAX. - Two more testcase. gcc/ChangeLog: PR target/118182 * config/riscv/autovec-opt.md (*widen_reduc_plus_scal_<mode>): Adjust argument for expand_reduction. (*widen_reduc_plus_scal_<mode>): Ditto. (*fold_left_widen_plus_<mode>): Ditto. (*mask_len_fold_left_widen_plus_<mode>): Ditto. (*cond_widen_reduc_plus_scal_<mode>): Ditto. (*cond_len_widen_reduc_plus_scal_<mode>): Ditto. (*cond_widen_reduc_plus_scal_<mode>): Ditto. * config/riscv/autovec.md (reduc_plus_scal_<mode>): Adjust argument for expand_reduction. (reduc_smax_scal_<mode>): Ditto. (reduc_umax_scal_<mode>): Ditto. (reduc_smin_scal_<mode>): Ditto. (reduc_umin_scal_<mode>): Ditto. (reduc_and_scal_<mode>): Ditto. (reduc_ior_scal_<mode>): Ditto. (reduc_xor_scal_<mode>): Ditto. (reduc_plus_scal_<mode>): Ditto. (reduc_smax_scal_<mode>): Ditto. (reduc_smin_scal_<mode>): Ditto. (reduc_fmax_scal_<mode>): Ditto. (reduc_fmin_scal_<mode>): Ditto. (fold_left_plus_<mode>): Ditto. (mask_len_fold_left_plus_<mode>): Ditto. * config/riscv/riscv-v.cc (expand_reduction): Add one more argument for reduction code for vl0-safe. * config/riscv/riscv-protos.h (expand_reduction): Ditto. * config/riscv/vector-iterators.md (unspec): Add _VL0_SAFE variant of reduction. (ANY_REDUC_VL0_SAFE): New. (ANY_WREDUC_VL0_SAFE): Ditto. (ANY_FREDUC_VL0_SAFE): Ditto. (ANY_FREDUC_SUM_VL0_SAFE): Ditto. (ANY_FWREDUC_SUM_VL0_SAFE): Ditto. (reduc_op): Add _VL0_SAFE variant of reduction. (order) Ditto. * config/riscv/vector.md (@pred_<reduc_op><mode>): New. gcc/testsuite/ChangeLog: PR target/118182 * gfortran.target/riscv/rvv/pr118182.f: New. * gcc.target/riscv/rvv/autovec/pr118182-1.c: New. * gcc.target/riscv/rvv/autovec/pr118182-2.c: New. Diff: --- gcc/config/riscv/autovec-opt.md | 10 +- gcc/config/riscv/autovec.md | 51 ++++++-- gcc/config/riscv/riscv-protos.h | 2 +- gcc/config/riscv/riscv-v.cc | 25 +++- gcc/config/riscv/vector-iterators.md | 59 +++++++++ gcc/config/riscv/vector.md | 133 ++++++++++++++++++++- .../gcc.target/riscv/rvv/autovec/pr118182-1.c | 28 +++++ .../gcc.target/riscv/rvv/autovec/pr118182-2.c | 27 +++++ gcc/testsuite/gfortran.target/riscv/rvv/pr118182.f | 63 ++++++++++ 9 files changed, 375 insertions(+), 23 deletions(-) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index d7190725adb9..53431863441c 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -810,7 +810,8 @@ "&& 1" [(const_int 0)] { - riscv_vector::expand_reduction (<WREDUC_UNSPEC>, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (<WREDUC_UNSPEC>, <WREDUC_UNSPEC_VL0_SAFE>, + riscv_vector::REDUCE_OP, operands, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); DONE; @@ -830,6 +831,7 @@ [(const_int 0)] { riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED, + UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE, riscv_vector::REDUCE_OP_FRM_DYN, operands, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); @@ -851,6 +853,7 @@ [(const_int 0)] { riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED, + UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE, riscv_vector::REDUCE_OP_FRM_DYN, operands, operands[2]); DONE; @@ -879,6 +882,7 @@ { rtx ops[] = {operands[0], operands[2], operands[3], operands[4]}; riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED, + UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE, riscv_vector::REDUCE_OP_M_FRM_DYN, ops, operands[1]); } @@ -1227,6 +1231,7 @@ rtx ops[] = {operands[0], operands[2], operands[1], gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)}; riscv_vector::expand_reduction (<WREDUC_UNSPEC>, + <WREDUC_UNSPEC_VL0_SAFE>, riscv_vector::REDUCE_OP_M, ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); DONE; @@ -1282,6 +1287,7 @@ { rtx ops[] = {operands[0], operands[3], operands[1], operands[2]}; riscv_vector::expand_reduction (<WREDUC_UNSPEC>, + <WREDUC_UNSPEC_VL0_SAFE>, riscv_vector::REDUCE_OP_M, ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); DONE; @@ -1318,6 +1324,7 @@ rtx ops[] = {operands[0], operands[2], operands[1], gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)}; riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED, + UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE, riscv_vector::REDUCE_OP_M_FRM_DYN, ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); DONE; @@ -1373,6 +1380,7 @@ { rtx ops[] = {operands[0], operands[3], operands[1], operands[2]}; riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED, + UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE, riscv_vector::REDUCE_OP_M_FRM_DYN, ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); DONE; diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 8426f12757f3..92e6942b5233 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2185,7 +2185,9 @@ "&& 1" [(const_int 0)] { - riscv_vector::expand_reduction (UNSPEC_REDUC_SUM, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_SUM, + UNSPEC_REDUC_SUM_VL0_SAFE, + riscv_vector::REDUCE_OP, operands, CONST0_RTX (<VEL>mode)); DONE; } @@ -2198,7 +2200,9 @@ { int prec = GET_MODE_PRECISION (<VEL>mode); rtx min = immed_wide_int_const (wi::min_value (prec, SIGNED), <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, + UNSPEC_REDUC_MAX_VL0_SAFE, + riscv_vector::REDUCE_OP, operands, min); DONE; }) @@ -2208,7 +2212,9 @@ (match_operand:V_VLSI 1 "register_operand")] "TARGET_VECTOR" { - riscv_vector::expand_reduction (UNSPEC_REDUC_MAXU, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MAXU, + UNSPEC_REDUC_MAXU_VL0_SAFE, + riscv_vector::REDUCE_OP, operands, CONST0_RTX (<VEL>mode)); DONE; }) @@ -2220,7 +2226,9 @@ { int prec = GET_MODE_PRECISION (<VEL>mode); rtx max = immed_wide_int_const (wi::max_value (prec, SIGNED), <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, + UNSPEC_REDUC_MIN_VL0_SAFE, + riscv_vector::REDUCE_OP, operands, max); DONE; }) @@ -2232,7 +2240,9 @@ { int prec = GET_MODE_PRECISION (<VEL>mode); rtx max = immed_wide_int_const (wi::max_value (prec, UNSIGNED), <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MINU, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MINU, + UNSPEC_REDUC_MINU_VL0_SAFE, + riscv_vector::REDUCE_OP, operands, max); DONE; }) @@ -2242,7 +2252,9 @@ (match_operand:V_VLSI 1 "register_operand")] "TARGET_VECTOR" { - riscv_vector::expand_reduction (UNSPEC_REDUC_AND, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_AND, + UNSPEC_REDUC_AND_VL0_SAFE, + riscv_vector::REDUCE_OP, operands, CONSTM1_RTX (<VEL>mode)); DONE; }) @@ -2252,7 +2264,9 @@ (match_operand:V_VLSI 1 "register_operand")] "TARGET_VECTOR" { - riscv_vector::expand_reduction (UNSPEC_REDUC_OR, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_OR, + UNSPEC_REDUC_OR_VL0_SAFE, + riscv_vector::REDUCE_OP, operands, CONST0_RTX (<VEL>mode)); DONE; }) @@ -2262,7 +2276,9 @@ (match_operand:V_VLSI 1 "register_operand")] "TARGET_VECTOR" { - riscv_vector::expand_reduction (UNSPEC_REDUC_XOR, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_XOR, + UNSPEC_REDUC_XOR_VL0_SAFE, + riscv_vector::REDUCE_OP, operands, CONST0_RTX (<VEL>mode)); DONE; }) @@ -2287,6 +2303,7 @@ [(const_int 0)] { riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_UNORDERED, + UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE, riscv_vector::REDUCE_OP_FRM_DYN, operands, CONST0_RTX (<VEL>mode)); DONE; @@ -2301,7 +2318,9 @@ REAL_VALUE_TYPE rv; real_inf (&rv, true); rtx f = const_double_from_real_value (rv, <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, + UNSPEC_REDUC_MAX_VL0_SAFE, + riscv_vector::REDUCE_OP, operands, f); DONE; }) @@ -2314,7 +2333,9 @@ REAL_VALUE_TYPE rv; real_inf (&rv, false); rtx f = const_double_from_real_value (rv, <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, + UNSPEC_REDUC_MIN_VL0_SAFE, + riscv_vector::REDUCE_OP, operands, f); DONE; }) @@ -2327,7 +2348,9 @@ REAL_VALUE_TYPE rv; real_inf (&rv, true); rtx f = const_double_from_real_value (rv, <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, + UNSPEC_REDUC_MAX_VL0_SAFE, + riscv_vector::REDUCE_OP, operands, f); DONE; }) @@ -2340,7 +2363,9 @@ REAL_VALUE_TYPE rv; real_inf (&rv, false); rtx f = const_double_from_real_value (rv, <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, + UNSPEC_REDUC_MIN_VL0_SAFE, + riscv_vector::REDUCE_OP, operands, f); DONE; }) @@ -2366,6 +2391,7 @@ { rtx ops[] = {operands[0], operands[2]}; riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED, + UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE, riscv_vector::REDUCE_OP_FRM_DYN, ops, operands[1]); DONE; @@ -2393,6 +2419,7 @@ { rtx ops[] = {operands[0], operands[2], operands[3], operands[4]}; riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED, + UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE, riscv_vector::REDUCE_OP_M_FRM_DYN, ops, operands[1]); } diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index dd3b36d47a69..d9421c907dab 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -639,7 +639,7 @@ void expand_vec_cmp (rtx, rtx_code, rtx, rtx, rtx = nullptr, rtx = nullptr); bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool); void expand_cond_len_unop (unsigned, rtx *); void expand_cond_len_binop (unsigned, rtx *); -void expand_reduction (unsigned, unsigned, rtx *, rtx); +void expand_reduction (unsigned, unsigned, unsigned, rtx *, rtx); void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode); void expand_vec_floor (rtx, rtx, machine_mode, machine_mode); void expand_vec_nearbyint (rtx, rtx, machine_mode, machine_mode); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 3735a5ba6594..e1172e9c7d2b 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4512,30 +4512,47 @@ expand_cond_ternop (unsigned icode, rtx *ops) Case 2: ops = {scalar_dest, vector_src, mask, vl} */ void -expand_reduction (unsigned unspec, unsigned insn_flags, rtx *ops, rtx init) +expand_reduction (unsigned unspec, unsigned unspec_for_vl0_safe, + unsigned insn_flags, rtx *ops, rtx init) { rtx scalar_dest = ops[0]; rtx vector_src = ops[1]; machine_mode vmode = GET_MODE (vector_src); machine_mode vel_mode = GET_MODE (scalar_dest); machine_mode m1_mode = get_m1_mode (vel_mode).require (); + rtx vl_op = NULL_RTX; + bool need_vl0_safe = false; + if (need_mask_operand_p (insn_flags)) + { + vl_op = ops[3]; + need_vl0_safe = !CONST_INT_P (vl_op) && !CONST_POLY_INT_P (vl_op); + } rtx m1_tmp = gen_reg_rtx (m1_mode); rtx scalar_move_ops[] = {m1_tmp, init}; insn_code icode = code_for_pred_broadcast (m1_mode); if (need_mask_operand_p (insn_flags)) - emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, ops[3]); + { + if (need_vl0_safe) + emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, const1_rtx); + else + emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, vl_op); + } else emit_vlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops); rtx m1_tmp2 = gen_reg_rtx (m1_mode); rtx reduc_ops[] = {m1_tmp2, vector_src, m1_tmp}; - icode = code_for_pred (unspec, vmode); + + if (need_vl0_safe) + icode = code_for_pred (unspec_for_vl0_safe, vmode); + else + icode = code_for_pred (unspec, vmode); if (need_mask_operand_p (insn_flags)) { rtx mask_len_reduc_ops[] = {m1_tmp2, ops[2], vector_src, m1_tmp}; - emit_nonvlmax_insn (icode, insn_flags, mask_len_reduc_ops, ops[3]); + emit_nonvlmax_insn (icode, insn_flags, mask_len_reduc_ops, vl_op); } else emit_vlmax_insn (icode, insn_flags, reduc_ops); diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index dfab0bbf014c..3c8da66cebf4 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -88,8 +88,11 @@ ;; Integer and Float Reduction UNSPEC_REDUC UNSPEC_REDUC_SUM + UNSPEC_REDUC_SUM_VL0_SAFE UNSPEC_REDUC_SUM_ORDERED UNSPEC_REDUC_SUM_UNORDERED + UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE + UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE UNSPEC_REDUC_MAXU UNSPEC_REDUC_MAX UNSPEC_REDUC_MINU @@ -97,11 +100,22 @@ UNSPEC_REDUC_AND UNSPEC_REDUC_OR UNSPEC_REDUC_XOR + UNSPEC_REDUC_MAXU_VL0_SAFE + UNSPEC_REDUC_MAX_VL0_SAFE + UNSPEC_REDUC_MINU_VL0_SAFE + UNSPEC_REDUC_MIN_VL0_SAFE + UNSPEC_REDUC_AND_VL0_SAFE + UNSPEC_REDUC_OR_VL0_SAFE + UNSPEC_REDUC_XOR_VL0_SAFE UNSPEC_WREDUC_SUM UNSPEC_WREDUC_SUMU + UNSPEC_WREDUC_SUM_VL0_SAFE + UNSPEC_WREDUC_SUMU_VL0_SAFE UNSPEC_WREDUC_SUM_ORDERED UNSPEC_WREDUC_SUM_UNORDERED + UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE + UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE UNSPEC_SELECT_MASK UNSPEC_SF_VFNRCLIP @@ -1665,32 +1679,75 @@ UNSPEC_REDUC_MIN UNSPEC_REDUC_AND UNSPEC_REDUC_OR UNSPEC_REDUC_XOR ]) +(define_int_iterator ANY_REDUC_VL0_SAFE [ + UNSPEC_REDUC_SUM_VL0_SAFE UNSPEC_REDUC_MAXU_VL0_SAFE UNSPEC_REDUC_MAX_VL0_SAFE UNSPEC_REDUC_MINU_VL0_SAFE + UNSPEC_REDUC_MIN_VL0_SAFE UNSPEC_REDUC_AND_VL0_SAFE UNSPEC_REDUC_OR_VL0_SAFE UNSPEC_REDUC_XOR_VL0_SAFE +]) + (define_int_iterator ANY_WREDUC [ UNSPEC_WREDUC_SUM UNSPEC_WREDUC_SUMU ]) +(define_int_iterator ANY_WREDUC_VL0_SAFE [ + UNSPEC_WREDUC_SUM_VL0_SAFE UNSPEC_WREDUC_SUMU_VL0_SAFE +]) + (define_int_iterator ANY_FREDUC [ UNSPEC_REDUC_MAX UNSPEC_REDUC_MIN ]) +(define_int_iterator ANY_FREDUC_VL0_SAFE [ + UNSPEC_REDUC_MAX_VL0_SAFE UNSPEC_REDUC_MIN_VL0_SAFE +]) + (define_int_iterator ANY_FREDUC_SUM [ UNSPEC_REDUC_SUM_ORDERED UNSPEC_REDUC_SUM_UNORDERED ]) +(define_int_iterator ANY_FREDUC_SUM_VL0_SAFE [ + UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE +]) + (define_int_iterator ANY_FWREDUC_SUM [ UNSPEC_WREDUC_SUM_ORDERED UNSPEC_WREDUC_SUM_UNORDERED ]) +(define_int_iterator ANY_FWREDUC_SUM_VL0_SAFE [ + UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE +]) + +(define_int_attr reduc_op_pat_name [ + (UNSPEC_REDUC_SUM "redsum") + (UNSPEC_REDUC_SUM_VL0_SAFE "redsum_vl0s") + (UNSPEC_REDUC_SUM_ORDERED "redosum") (UNSPEC_REDUC_SUM_UNORDERED "redusum") + (UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE "redosum_vl0s") (UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE "redusum_vl0s") + (UNSPEC_REDUC_MAXU "redmaxu") (UNSPEC_REDUC_MAX "redmax") (UNSPEC_REDUC_MINU "redminu") (UNSPEC_REDUC_MIN "redmin") + (UNSPEC_REDUC_MAXU_VL0_SAFE "redmaxu_vl0s") (UNSPEC_REDUC_MAX_VL0_SAFE "redmax_vl0s") (UNSPEC_REDUC_MINU_VL0_SAFE "redminu_vl0s") (UNSPEC_REDUC_MIN_VL0_SAFE "redmin_vl0s") + (UNSPEC_REDUC_AND "redand") (UNSPEC_REDUC_OR "redor") (UNSPEC_REDUC_XOR "redxor") + (UNSPEC_REDUC_AND_VL0_SAFE "redand_vl0s") (UNSPEC_REDUC_OR_VL0_SAFE "redor_vl0s") (UNSPEC_REDUC_XOR_VL0_SAFE "redxor_vl0s") + (UNSPEC_WREDUC_SUM "wredsum") (UNSPEC_WREDUC_SUMU "wredsumu") + (UNSPEC_WREDUC_SUM_VL0_SAFE "wredsum_vl0s") (UNSPEC_WREDUC_SUMU_VL0_SAFE "wredsumu_vl0s") + (UNSPEC_WREDUC_SUM_ORDERED "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED "wredusum") + (UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE "wredosum_vl0s") (UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE "wredusum_vl0s") +]) + (define_int_attr reduc_op [ (UNSPEC_REDUC_SUM "redsum") + (UNSPEC_REDUC_SUM_VL0_SAFE "redsum") (UNSPEC_REDUC_SUM_ORDERED "redosum") (UNSPEC_REDUC_SUM_UNORDERED "redusum") + (UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE "redosum") (UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE "redusum") (UNSPEC_REDUC_MAXU "redmaxu") (UNSPEC_REDUC_MAX "redmax") (UNSPEC_REDUC_MINU "redminu") (UNSPEC_REDUC_MIN "redmin") + (UNSPEC_REDUC_MAXU_VL0_SAFE "redmaxu") (UNSPEC_REDUC_MAX_VL0_SAFE "redmax") (UNSPEC_REDUC_MINU_VL0_SAFE "redminu") (UNSPEC_REDUC_MIN_VL0_SAFE "redmin") (UNSPEC_REDUC_AND "redand") (UNSPEC_REDUC_OR "redor") (UNSPEC_REDUC_XOR "redxor") + (UNSPEC_REDUC_AND_VL0_SAFE "redand") (UNSPEC_REDUC_OR_VL0_SAFE "redor") (UNSPEC_REDUC_XOR_VL0_SAFE "redxor") (UNSPEC_WREDUC_SUM "wredsum") (UNSPEC_WREDUC_SUMU "wredsumu") + (UNSPEC_WREDUC_SUM_VL0_SAFE "wredsum") (UNSPEC_WREDUC_SUMU_VL0_SAFE "wredsumu") (UNSPEC_WREDUC_SUM_ORDERED "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED "wredusum") + (UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE "wredusum") ]) (define_code_attr WREDUC_UNSPEC [(zero_extend "UNSPEC_WREDUC_SUMU") (sign_extend "UNSPEC_WREDUC_SUM")]) +(define_code_attr WREDUC_UNSPEC_VL0_SAFE [(zero_extend "UNSPEC_WREDUC_SUMU_VL0_SAFE") (sign_extend "UNSPEC_WREDUC_SUM_VL0_SAFE")]) (define_mode_attr VINDEX [ (RVVM8QI "RVVM8QI") (RVVM4QI "RVVM4QI") (RVVM2QI "RVVM2QI") (RVVM1QI "RVVM1QI") @@ -3930,6 +3987,8 @@ (UNSPEC_ORDERED "o") (UNSPEC_UNORDERED "u") (UNSPEC_REDUC_SUM_ORDERED "o") (UNSPEC_REDUC_SUM_UNORDERED "u") (UNSPEC_WREDUC_SUM_ORDERED "o") (UNSPEC_WREDUC_SUM_UNORDERED "u") + (UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE "o") (UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE "u") + (UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE "o") (UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE "u") ]) (define_int_attr v_su [(UNSPEC_VMULHS "") (UNSPEC_VMULHU "u") (UNSPEC_VMULHSU "su") diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index e78d10906963..4658db2653fe 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -7745,9 +7745,14 @@ ;; - 14.3 Vector Single-Width Floating-Point Reduction Instructions ;; - 14.4 Vector Widening Floating-Point Reduction Instructions ;; ------------------------------------------------------------------------------- +;; +;; NOTE for VL0 safe variantreduction: +;; The VL0 safe variantis used by the auto vectorizer to generate vectorized code +;; only, because the auto vectorizer expect reduction should propgat the start +;; value to dest even VL=0, the only way is force vd=vs1 by constraint. ;; Integer Reduction (vred(sum|maxu|max|minu|min|and|or|xor).vs) -(define_insn "@pred_<reduc_op><mode>" +(define_insn "@pred_<reduc_op_pat_name><mode>" [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr, vr") (unspec:<V_LMUL1> [(unspec:<VM> @@ -7767,8 +7772,30 @@ [(set_attr "type" "vired") (set_attr "mode" "<MODE>")]) +;; Integer Reduction (vred(sum|maxu|max|minu|min|and|or|xor).vs) +;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail) +(define_insn "@pred_<reduc_op_pat_name><mode>" + [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr") + (unspec:<V_LMUL1> + [(unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<V_LMUL1> [ + (match_operand:V_VLSI 3 "register_operand" " vr") + (match_operand:<V_LMUL1> 4 "register_operand" " 0") + ] ANY_REDUC_VL0_SAFE) + (match_operand:<V_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))] + "TARGET_VECTOR" + "v<reduc_op>.vs\t%0,%3,%4%p1" + [(set_attr "type" "vired") + (set_attr "mode" "<MODE>")]) + ;; Integer Widen Reduction Sum (vwredsum[u].vs) -(define_insn "@pred_<reduc_op><mode>" +(define_insn "@pred_<reduc_op_pat_name><mode>" [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr, vr") (unspec:<V_EXT_LMUL1> [(unspec:<VM> @@ -7788,8 +7815,30 @@ [(set_attr "type" "viwred") (set_attr "mode" "<MODE>")]) +;; Integer Widen Reduction Sum (vwredsum[u].vs) +;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail) +(define_insn "@pred_<reduc_op_pat_name><mode>" + [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr") + (unspec:<V_EXT_LMUL1> + [(unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<V_EXT_LMUL1> [ + (match_operand:VI_QHS 3 "register_operand" " vr") + (match_operand:<V_EXT_LMUL1> 4 "register_operand" " 0") + ] ANY_WREDUC_VL0_SAFE) + (match_operand:<V_EXT_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))] + "TARGET_VECTOR" + "v<reduc_op>.vs\t%0,%3,%4%p1" + [(set_attr "type" "viwred") + (set_attr "mode" "<MODE>")]) + ;; Float Reduction (vfred(max|min).vs) -(define_insn "@pred_<reduc_op><mode>" +(define_insn "@pred_<reduc_op_pat_name><mode>" [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr, vr") (unspec:<V_LMUL1> [(unspec:<VM> @@ -7809,8 +7858,30 @@ [(set_attr "type" "vfredu") (set_attr "mode" "<MODE>")]) +;; Float Reduction (vfred(max|min).vs) +;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail) +(define_insn "@pred_<reduc_op_pat_name><mode>" + [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr") + (unspec:<V_LMUL1> + [(unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<V_LMUL1> [ + (match_operand:V_VLSF 3 "register_operand" " vr") + (match_operand:<V_LMUL1> 4 "register_operand" " 0") + ] ANY_FREDUC_VL0_SAFE) + (match_operand:<V_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))] + "TARGET_VECTOR" + "vf<reduc_op>.vs\t%0,%3,%4%p1" + [(set_attr "type" "vfredu") + (set_attr "mode" "<MODE>")]) + ;; Float Reduction Sum (vfred[ou]sum.vs) -(define_insn "@pred_<reduc_op><mode>" +(define_insn "@pred_<reduc_op_pat_name><mode>" [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr,vr") (unspec:<V_LMUL1> [(unspec:<VM> @@ -7834,8 +7905,34 @@ (set (attr "frm_mode") (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))]) +;; Float Reduction Sum (vfred[ou]sum.vs) +;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail) +(define_insn "@pred_<reduc_op_pat_name><mode>" + [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr") + (unspec:<V_LMUL1> + [(unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM) + (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<V_LMUL1> [ + (match_operand:V_VLSF 3 "register_operand" " vr") + (match_operand:<V_LMUL1> 4 "register_operand" " 0") + ] ANY_FREDUC_SUM_VL0_SAFE) + (match_operand:<V_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))] + "TARGET_VECTOR" + "vf<reduc_op>.vs\t%0,%3,%4%p1" + [(set_attr "type" "vfred<order>") + (set_attr "mode" "<MODE>") + (set (attr "frm_mode") + (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))]) + ;; Float Widen Reduction Sum (vfwred[ou]sum.vs) -(define_insn "@pred_<reduc_op><mode>" +(define_insn "@pred_<reduc_op_pat_name><mode>" [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr, vr") (unspec:<V_EXT_LMUL1> [(unspec:<VM> @@ -7859,6 +7956,32 @@ (set (attr "frm_mode") (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))]) +;; Float Widen Reduction Sum (vfwred[ou]sum.vs) +;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail) +(define_insn "@pred_<reduc_op_pat_name><mode>" + [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr") + (unspec:<V_EXT_LMUL1> + [(unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM) + (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<V_EXT_LMUL1> [ + (match_operand:VF_HS 3 "register_operand" " vr") + (match_operand:<V_EXT_LMUL1> 4 "register_operand" " 0") + ] ANY_FWREDUC_SUM_VL0_SAFE) + (match_operand:<V_EXT_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))] + "TARGET_VECTOR" + "vf<reduc_op>.vs\t%0,%3,%4%p1" + [(set_attr "type" "vfwred<order>") + (set_attr "mode" "<MODE>") + (set (attr "frm_mode") + (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))]) + ;; ------------------------------------------------------------------------------- ;; ---- Predicated permutation operations ;; ------------------------------------------------------------------------------- diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-1.c new file mode 100644 index 000000000000..1ab17245ba90 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-1.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -fno-vect-cost-model -O3 -mabi=lp64d" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ + +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** f1: +** ... +** vsetivli zero,1,.* +** ... +** vfmv.s.f .* +** ... +** vsetvli zero,.* +** ... +** vfredosum.vs .* +** ... +** vfmv.f.s .* +** ... +*/ + +float f1(float *arr, int n) +{ + float sum = 0; + for (int i = 0; i < n; i++) + sum += arr[i]; + return sum; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-2.c new file mode 100644 index 000000000000..619d757a14fa --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-2.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -fno-vect-cost-model -O3 -mabi=lp64d -ffast-math" } */ +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ + +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** f1: +** ... +** vsetvli [ast][0-9]+,zero,.* +** ... +** vmv.s.x .* +** ... +** vfredusum.vs .* +** ... +** vfmv.f.s .* +** ... +*/ + +float f1(float *arr, int n) +{ + float sum = 0; + for (int i = 0; i < n; i++) + sum += arr[i]; + return sum; +} +/* { dg-final { scan-assembler-not {\tvsetivli\tzero,1,.*} } } */ diff --git a/gcc/testsuite/gfortran.target/riscv/rvv/pr118182.f b/gcc/testsuite/gfortran.target/riscv/rvv/pr118182.f new file mode 100644 index 000000000000..7ecbfeb863b8 --- /dev/null +++ b/gcc/testsuite/gfortran.target/riscv/rvv/pr118182.f @@ -0,0 +1,63 @@ +! { dg-do run } +! { dg-options "-fno-vect-cost-model" } + + program dqnorm_calculator + implicit none + + ! Declare variables + integer, parameter :: nx = 33, ny = 33, nz =16 + real(8) :: dq(5, nx, ny, nz) + real(8) :: result, expected_result, tolerance + integer :: i, j, k, l + + ! Initialize the dq array with values calculated as k + j + i + 5 + do k = 1, nz + do j = 1, ny + do i = 1, nx + do l = 1, 5 + dq(l, i, j, k) = k + j + i + 5 + end do + end do + end do + end do + + ! Call the subroutine to calculate the norm + call redsum(dq, nx, ny, nz, result) + + ! Check the result + expected_result = 214213560.0d0 + tolerance = 0.0001d0 + if (abs(result - expected_result) > tolerance) then + print *, "Result is incorrect: ", result + call abort() + end if + end + + subroutine redsum(dq, nx, ny, nz, result) + implicit none + + ! Declare arguments and local variables + integer, intent(in) :: nx, ny, nz + real(8), intent(in) :: dq(5, nx, ny, nz) + real(8), intent(out) :: result + real(8) :: dqnorm + integer :: i, j, k, l + + ! Initialize dqnorm + dqnorm = 0.0d0 + + ! Compute the sum of squares of dq elements + do k = 1, nz + do j = 1, ny + do i = 1, nx + do l = 1, 5 + dqnorm = dqnorm + dq(l, i, j, k) * dq(l, i, j, k) + end do + end do + end do + end do + + result = dqnorm + + end subroutine redsum +