`.MASK_LEN_FOLD_LEFT_PLUS`(or `mask_len_fold_left_plus_m`) is expecting the return value will be the start value even if the length is 0.
However current code gen in RISC-V backend is not meet that semantic, it will result a random garbage value if length is 0. Let example by current code gen for MASK_LEN_FOLD_LEFT_PLUS with f64: # _148 = .MASK_LEN_FOLD_LEFT_PLUS (stmp__148.33_134, vect__70.32_138, { -1, ... }, loop_len_161, 0); vsetvli zero,a5,e64,m1,ta,ma vfmv.s.f v2,fa5 # insn 1 vfredosum.vs v1,v1,v2 # insn 2 vfmv.f.s fa5,v1 # insn 3 insn 1: - vfmv.s.f won't do anything if VL=0, which means v2 will contain garbage value. insn 2: - vfredosum.vs won't do anything if VL=0, and keep vd unchanged even TA. (v-spec say: `If vl=0, no operation is performed and the destination register is not updated.`) insn 3: - vfmv.f.s will move the value from v1 even VL=0, so this is safe. So how we fix that? we need two fix for that: 1. insn 1: need always execute with VL=1, so that we can guarantee it will always work as expect. 2. insn 2: Add new pattern to force `vd` use same reg as `vs1` (start value) for all reduction patterns, then we can guarantee vd[0] will contain the start value when vl=0 For 1, it's just a simple change to riscv_vector::expand_reduction, but for 2, we have to add _AV variant reduction to force `vd` use same reg as `vs1` (start value). gcc/ChangeLog: * config/riscv/autovec-opt.md (*widen_reduc_plus_scal_<mode>): Use _AV variant of reduction expansion. (*widen_reduc_plus_scal_<mode>): Ditto. (*fold_left_widen_plus_<mode>): Ditto. (*mask_len_fold_left_widen_plus_<mode>): Ditto. (*cond_widen_reduc_plus_scal_<mode>): Ditto. (*cond_len_widen_reduc_plus_scal_<mode>): Ditto. (*cond_widen_reduc_plus_scal_<mode>): Ditto. * config/riscv/autovec.md (expand_reduction): Use _AV variant of reduction expansion, also always use VL=1 for setup start value. * config/riscv/riscv-v.cc: * config/riscv/vector-iterators.md (unspec): Add _AV variant of reduction. (ANY_REDUC_AV): New. (ANY_WREDUC_AV): Ditto. (ANY_FREDUC_AV): Ditto. (ANY_FREDUC_SUM_AV): Ditto. (ANY_FWREDUC_SUM_AV): Ditto. (reduc_op): Add _AV variant of reduction. (order) Ditto. * config/riscv/vector.md (@pred_av_<reduc_op><mode>): New. gcc/testsuite/ChangeLog: * gfortran.target/riscv/rvv/pr118182.f: New. --- gcc/config/riscv/autovec-opt.md | 16 +-- gcc/config/riscv/autovec.md | 30 ++--- gcc/config/riscv/riscv-v.cc | 4 +- gcc/config/riscv/vector-iterators.md | 44 +++++++ gcc/config/riscv/vector.md | 123 ++++++++++++++++++ .../gfortran.target/riscv/rvv/pr118182.f | 63 +++++++++ 6 files changed, 255 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/gfortran.target/riscv/rvv/pr118182.f diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 4b33a145c17..45e3be4237b 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -810,7 +810,7 @@ "&& 1" [(const_int 0)] { - riscv_vector::expand_reduction (<WREDUC_UNSPEC>, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (<WREDUC_UNSPEC_AV>, riscv_vector::REDUCE_OP, operands, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); DONE; @@ -829,7 +829,7 @@ "&& 1" [(const_int 0)] { - riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED, + riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED_AV, riscv_vector::REDUCE_OP_FRM_DYN, operands, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); @@ -850,7 +850,7 @@ "&& 1" [(const_int 0)] { - riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED, + riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED_AV, riscv_vector::REDUCE_OP_FRM_DYN, operands, operands[2]); DONE; @@ -878,7 +878,7 @@ else { rtx ops[] = {operands[0], operands[2], operands[3], operands[4]}; - riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED, + riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED_AV, riscv_vector::REDUCE_OP_M_FRM_DYN, ops, operands[1]); } @@ -1226,7 +1226,7 @@ { rtx ops[] = {operands[0], operands[2], operands[1], gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)}; - riscv_vector::expand_reduction (<WREDUC_UNSPEC>, + riscv_vector::expand_reduction (<WREDUC_UNSPEC_AV>, riscv_vector::REDUCE_OP_M, ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); DONE; @@ -1281,7 +1281,7 @@ [(const_int 0)] { rtx ops[] = {operands[0], operands[3], operands[1], operands[2]}; - riscv_vector::expand_reduction (<WREDUC_UNSPEC>, + riscv_vector::expand_reduction (<WREDUC_UNSPEC_AV>, riscv_vector::REDUCE_OP_M, ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); DONE; @@ -1317,7 +1317,7 @@ { rtx ops[] = {operands[0], operands[2], operands[1], gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)}; - riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED, + riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED_AV, riscv_vector::REDUCE_OP_M_FRM_DYN, ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); DONE; @@ -1372,7 +1372,7 @@ [(const_int 0)] { rtx ops[] = {operands[0], operands[3], operands[1], operands[2]}; - riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED, + riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED_AV, riscv_vector::REDUCE_OP_M_FRM_DYN, ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); DONE; diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 88c0f00e0ea..a49e5f259c2 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2185,7 +2185,7 @@ "&& 1" [(const_int 0)] { - riscv_vector::expand_reduction (UNSPEC_REDUC_SUM, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_AV, riscv_vector::REDUCE_OP, operands, CONST0_RTX (<VEL>mode)); DONE; } @@ -2198,7 +2198,7 @@ { int prec = GET_MODE_PRECISION (<VEL>mode); rtx min = immed_wide_int_const (wi::min_value (prec, SIGNED), <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MAX_AV, riscv_vector::REDUCE_OP, operands, min); DONE; }) @@ -2208,7 +2208,7 @@ (match_operand:V_VLSI 1 "register_operand")] "TARGET_VECTOR" { - riscv_vector::expand_reduction (UNSPEC_REDUC_MAXU, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MAXU_AV, riscv_vector::REDUCE_OP, operands, CONST0_RTX (<VEL>mode)); DONE; }) @@ -2220,7 +2220,7 @@ { int prec = GET_MODE_PRECISION (<VEL>mode); rtx max = immed_wide_int_const (wi::max_value (prec, SIGNED), <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MIN_AV, riscv_vector::REDUCE_OP, operands, max); DONE; }) @@ -2232,7 +2232,7 @@ { int prec = GET_MODE_PRECISION (<VEL>mode); rtx max = immed_wide_int_const (wi::max_value (prec, UNSIGNED), <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MINU, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MINU_AV, riscv_vector::REDUCE_OP, operands, max); DONE; }) @@ -2242,7 +2242,7 @@ (match_operand:V_VLSI 1 "register_operand")] "TARGET_VECTOR" { - riscv_vector::expand_reduction (UNSPEC_REDUC_AND, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_AND_AV, riscv_vector::REDUCE_OP, operands, CONSTM1_RTX (<VEL>mode)); DONE; }) @@ -2252,7 +2252,7 @@ (match_operand:V_VLSI 1 "register_operand")] "TARGET_VECTOR" { - riscv_vector::expand_reduction (UNSPEC_REDUC_OR, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_OR_AV, riscv_vector::REDUCE_OP, operands, CONST0_RTX (<VEL>mode)); DONE; }) @@ -2262,7 +2262,7 @@ (match_operand:V_VLSI 1 "register_operand")] "TARGET_VECTOR" { - riscv_vector::expand_reduction (UNSPEC_REDUC_XOR, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_XOR_AV, riscv_vector::REDUCE_OP, operands, CONST0_RTX (<VEL>mode)); DONE; }) @@ -2286,7 +2286,7 @@ "&& 1" [(const_int 0)] { - riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_UNORDERED, + riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_UNORDERED_AV, riscv_vector::REDUCE_OP_FRM_DYN, operands, CONST0_RTX (<VEL>mode)); DONE; @@ -2301,7 +2301,7 @@ REAL_VALUE_TYPE rv; real_inf (&rv, true); rtx f = const_double_from_real_value (rv, <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MAX_AV, riscv_vector::REDUCE_OP, operands, f); DONE; }) @@ -2314,7 +2314,7 @@ REAL_VALUE_TYPE rv; real_inf (&rv, false); rtx f = const_double_from_real_value (rv, <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MIN_AV, riscv_vector::REDUCE_OP, operands, f); DONE; }) @@ -2327,7 +2327,7 @@ REAL_VALUE_TYPE rv; real_inf (&rv, true); rtx f = const_double_from_real_value (rv, <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MAX_AV, riscv_vector::REDUCE_OP, operands, f); DONE; }) @@ -2340,7 +2340,7 @@ REAL_VALUE_TYPE rv; real_inf (&rv, false); rtx f = const_double_from_real_value (rv, <VEL>mode); - riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP, + riscv_vector::expand_reduction (UNSPEC_REDUC_MIN_AV, riscv_vector::REDUCE_OP, operands, f); DONE; }) @@ -2365,7 +2365,7 @@ [(const_int 0)] { rtx ops[] = {operands[0], operands[2]}; - riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED, + riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED_AV, riscv_vector::REDUCE_OP_FRM_DYN, ops, operands[1]); DONE; @@ -2392,7 +2392,7 @@ else { rtx ops[] = {operands[0], operands[2], operands[3], operands[4]}; - riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED, + riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED_AV, riscv_vector::REDUCE_OP_M_FRM_DYN, ops, operands[1]); } diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index ef48d790034..78c4c294094 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4516,13 +4516,13 @@ expand_reduction (unsigned unspec, unsigned insn_flags, rtx *ops, rtx init) rtx scalar_move_ops[] = {m1_tmp, init}; insn_code icode = code_for_pred_broadcast (m1_mode); if (need_mask_operand_p (insn_flags)) - emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, ops[3]); + emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, gen_int_mode (1, Pmode)); else emit_vlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops); rtx m1_tmp2 = gen_reg_rtx (m1_mode); rtx reduc_ops[] = {m1_tmp2, vector_src, m1_tmp}; - icode = code_for_pred (unspec, vmode); + icode = code_for_pred_av (unspec, vmode); if (need_mask_operand_p (insn_flags)) { diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index dec5964edf2..772be1a7799 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -88,8 +88,11 @@ ;; Integer and Float Reduction UNSPEC_REDUC UNSPEC_REDUC_SUM + UNSPEC_REDUC_SUM_AV UNSPEC_REDUC_SUM_ORDERED UNSPEC_REDUC_SUM_UNORDERED + UNSPEC_REDUC_SUM_ORDERED_AV + UNSPEC_REDUC_SUM_UNORDERED_AV UNSPEC_REDUC_MAXU UNSPEC_REDUC_MAX UNSPEC_REDUC_MINU @@ -97,11 +100,22 @@ UNSPEC_REDUC_AND UNSPEC_REDUC_OR UNSPEC_REDUC_XOR + UNSPEC_REDUC_MAXU_AV + UNSPEC_REDUC_MAX_AV + UNSPEC_REDUC_MINU_AV + UNSPEC_REDUC_MIN_AV + UNSPEC_REDUC_AND_AV + UNSPEC_REDUC_OR_AV + UNSPEC_REDUC_XOR_AV UNSPEC_WREDUC_SUM UNSPEC_WREDUC_SUMU + UNSPEC_WREDUC_SUM_AV + UNSPEC_WREDUC_SUMU_AV UNSPEC_WREDUC_SUM_ORDERED UNSPEC_WREDUC_SUM_UNORDERED + UNSPEC_WREDUC_SUM_ORDERED_AV + UNSPEC_WREDUC_SUM_UNORDERED_AV UNSPEC_SELECT_MASK UNSPEC_SF_VFNRCLIP @@ -1665,32 +1679,60 @@ UNSPEC_REDUC_MIN UNSPEC_REDUC_AND UNSPEC_REDUC_OR UNSPEC_REDUC_XOR ]) +(define_int_iterator ANY_REDUC_AV [ + UNSPEC_REDUC_SUM_AV UNSPEC_REDUC_MAXU_AV UNSPEC_REDUC_MAX_AV UNSPEC_REDUC_MINU_AV + UNSPEC_REDUC_MIN_AV UNSPEC_REDUC_AND_AV UNSPEC_REDUC_OR_AV UNSPEC_REDUC_XOR_AV +]) + (define_int_iterator ANY_WREDUC [ UNSPEC_WREDUC_SUM UNSPEC_WREDUC_SUMU ]) +(define_int_iterator ANY_WREDUC_AV [ + UNSPEC_WREDUC_SUM_AV UNSPEC_WREDUC_SUMU_AV +]) + (define_int_iterator ANY_FREDUC [ UNSPEC_REDUC_MAX UNSPEC_REDUC_MIN ]) +(define_int_iterator ANY_FREDUC_AV [ + UNSPEC_REDUC_MAX_AV UNSPEC_REDUC_MIN_AV +]) + (define_int_iterator ANY_FREDUC_SUM [ UNSPEC_REDUC_SUM_ORDERED UNSPEC_REDUC_SUM_UNORDERED ]) +(define_int_iterator ANY_FREDUC_SUM_AV [ + UNSPEC_REDUC_SUM_ORDERED_AV UNSPEC_REDUC_SUM_UNORDERED_AV +]) + (define_int_iterator ANY_FWREDUC_SUM [ UNSPEC_WREDUC_SUM_ORDERED UNSPEC_WREDUC_SUM_UNORDERED ]) +(define_int_iterator ANY_FWREDUC_SUM_AV [ + UNSPEC_WREDUC_SUM_ORDERED_AV UNSPEC_WREDUC_SUM_UNORDERED_AV +]) + (define_int_attr reduc_op [ (UNSPEC_REDUC_SUM "redsum") + (UNSPEC_REDUC_SUM_AV "redsum") (UNSPEC_REDUC_SUM_ORDERED "redosum") (UNSPEC_REDUC_SUM_UNORDERED "redusum") + (UNSPEC_REDUC_SUM_ORDERED_AV "redosum") (UNSPEC_REDUC_SUM_UNORDERED_AV "redusum") (UNSPEC_REDUC_MAXU "redmaxu") (UNSPEC_REDUC_MAX "redmax") (UNSPEC_REDUC_MINU "redminu") (UNSPEC_REDUC_MIN "redmin") + (UNSPEC_REDUC_MAXU_AV "redmaxu") (UNSPEC_REDUC_MAX_AV "redmax") (UNSPEC_REDUC_MINU_AV "redminu") (UNSPEC_REDUC_MIN_AV "redmin") (UNSPEC_REDUC_AND "redand") (UNSPEC_REDUC_OR "redor") (UNSPEC_REDUC_XOR "redxor") + (UNSPEC_REDUC_AND_AV "redand") (UNSPEC_REDUC_OR_AV "redor") (UNSPEC_REDUC_XOR_AV "redxor") (UNSPEC_WREDUC_SUM "wredsum") (UNSPEC_WREDUC_SUMU "wredsumu") + (UNSPEC_WREDUC_SUM_AV "wredsum") (UNSPEC_WREDUC_SUMU_AV "wredsumu") (UNSPEC_WREDUC_SUM_ORDERED "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED "wredusum") + (UNSPEC_WREDUC_SUM_ORDERED_AV "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED_AV "wredusum") ]) (define_code_attr WREDUC_UNSPEC [(zero_extend "UNSPEC_WREDUC_SUMU") (sign_extend "UNSPEC_WREDUC_SUM")]) +(define_code_attr WREDUC_UNSPEC_AV [(zero_extend "UNSPEC_WREDUC_SUMU_AV") (sign_extend "UNSPEC_WREDUC_SUM_AV")]) (define_mode_attr VINDEX [ (RVVM8QI "RVVM8QI") (RVVM4QI "RVVM4QI") (RVVM2QI "RVVM2QI") (RVVM1QI "RVVM1QI") @@ -3930,6 +3972,8 @@ (UNSPEC_ORDERED "o") (UNSPEC_UNORDERED "u") (UNSPEC_REDUC_SUM_ORDERED "o") (UNSPEC_REDUC_SUM_UNORDERED "u") (UNSPEC_WREDUC_SUM_ORDERED "o") (UNSPEC_WREDUC_SUM_UNORDERED "u") + (UNSPEC_REDUC_SUM_ORDERED_AV "o") (UNSPEC_REDUC_SUM_UNORDERED_AV "u") + (UNSPEC_WREDUC_SUM_ORDERED_AV "o") (UNSPEC_WREDUC_SUM_UNORDERED_AV "u") ]) (define_int_attr v_su [(UNSPEC_VMULHS "") (UNSPEC_VMULHU "u") (UNSPEC_VMULHSU "su") diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index ff8f552b802..1d8a48b9399 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -7745,6 +7745,11 @@ ;; - 14.3 Vector Single-Width Floating-Point Reduction Instructions ;; - 14.4 Vector Widening Floating-Point Reduction Instructions ;; ------------------------------------------------------------------------------- +;; +;; NOTE for _av variant reduction: +;; The _av variant is used by the auto vectorizer to generate vectorized code +;; only, because the auto vectorizer expect reduction should propgat the start +;; value to dest even VL=0, the only way is force vd=vs1 by constraint. ;; Integer Reduction (vred(sum|maxu|max|minu|min|and|or|xor).vs) (define_insn "@pred_<reduc_op><mode>" @@ -7767,6 +7772,28 @@ [(set_attr "type" "vired") (set_attr "mode" "<MODE>")]) +;; Integer Reduction (vred(sum|maxu|max|minu|min|and|or|xor).vs) +;; but for auto vectorizer (see "NOTE for _av variant reduction" for detail) +(define_insn "@pred_av_<reduc_op><mode>" + [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr") + (unspec:<V_LMUL1> + [(unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<V_LMUL1> [ + (match_operand:V_VLSI 3 "register_operand" " vr") + (match_operand:<V_LMUL1> 4 "register_operand" " 0") + ] ANY_REDUC_AV) + (match_operand:<V_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))] + "TARGET_VECTOR" + "v<reduc_op>.vs\t%0,%3,%4%p1" + [(set_attr "type" "vired") + (set_attr "mode" "<MODE>")]) + ;; Integer Widen Reduction Sum (vwredsum[u].vs) (define_insn "@pred_<reduc_op><mode>" [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr, vr") @@ -7788,6 +7815,28 @@ [(set_attr "type" "viwred") (set_attr "mode" "<MODE>")]) +;; Integer Widen Reduction Sum (vwredsum[u].vs) +;; but for auto vectorizer (see "NOTE for _av variant reduction" for detail) +(define_insn "@pred_av_<reduc_op><mode>" + [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr") + (unspec:<V_EXT_LMUL1> + [(unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<V_EXT_LMUL1> [ + (match_operand:VI_QHS 3 "register_operand" " vr") + (match_operand:<V_EXT_LMUL1> 4 "register_operand" " 0") + ] ANY_WREDUC_AV) + (match_operand:<V_EXT_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))] + "TARGET_VECTOR" + "v<reduc_op>.vs\t%0,%3,%4%p1" + [(set_attr "type" "viwred") + (set_attr "mode" "<MODE>")]) + ;; Float Reduction (vfred(max|min).vs) (define_insn "@pred_<reduc_op><mode>" [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr, vr") @@ -7809,6 +7858,28 @@ [(set_attr "type" "vfredu") (set_attr "mode" "<MODE>")]) +;; Float Reduction (vfred(max|min).vs) +;; but for auto vectorizer (see "NOTE for _av variant reduction" for detail) +(define_insn "@pred_av_<reduc_op><mode>" + [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr") + (unspec:<V_LMUL1> + [(unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<V_LMUL1> [ + (match_operand:V_VLSF 3 "register_operand" " vr") + (match_operand:<V_LMUL1> 4 "register_operand" " 0") + ] ANY_FREDUC_AV) + (match_operand:<V_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))] + "TARGET_VECTOR" + "vf<reduc_op>.vs\t%0,%3,%4%p1" + [(set_attr "type" "vfredu") + (set_attr "mode" "<MODE>")]) + ;; Float Reduction Sum (vfred[ou]sum.vs) (define_insn "@pred_<reduc_op><mode>" [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr,vr") @@ -7834,6 +7905,32 @@ (set (attr "frm_mode") (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))]) +;; Float Reduction Sum (vfred[ou]sum.vs) +;; but for auto vectorizer (see "NOTE for _av variant reduction" for detail) +(define_insn "@pred_av_<reduc_op><mode>" + [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr") + (unspec:<V_LMUL1> + [(unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM) + (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<V_LMUL1> [ + (match_operand:V_VLSF 3 "register_operand" " vr") + (match_operand:<V_LMUL1> 4 "register_operand" " 0") + ] ANY_FREDUC_SUM_AV) + (match_operand:<V_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))] + "TARGET_VECTOR" + "vf<reduc_op>.vs\t%0,%3,%4%p1" + [(set_attr "type" "vfred<order>") + (set_attr "mode" "<MODE>") + (set (attr "frm_mode") + (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))]) + ;; Float Widen Reduction Sum (vfwred[ou]sum.vs) (define_insn "@pred_<reduc_op><mode>" [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr, vr") @@ -7859,6 +7956,32 @@ (set (attr "frm_mode") (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))]) +;; Float Widen Reduction Sum (vfwred[ou]sum.vs) +;; but for auto vectorizer (see "NOTE for _av variant reduction" for detail) +(define_insn "@pred_av_<reduc_op><mode>" + [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr") + (unspec:<V_EXT_LMUL1> + [(unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") + (match_operand 5 "vector_length_operand" " rK") + (match_operand 6 "const_int_operand" " i") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM) + (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) + (unspec:<V_EXT_LMUL1> [ + (match_operand:VF_HS 3 "register_operand" " vr") + (match_operand:<V_EXT_LMUL1> 4 "register_operand" " 0") + ] ANY_FWREDUC_SUM) + (match_operand:<V_EXT_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))] + "TARGET_VECTOR" + "vf<reduc_op>.vs\t%0,%3,%4%p1" + [(set_attr "type" "vfwred<order>") + (set_attr "mode" "<MODE>") + (set (attr "frm_mode") + (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))]) + ;; ------------------------------------------------------------------------------- ;; ---- Predicated permutation operations ;; ------------------------------------------------------------------------------- diff --git a/gcc/testsuite/gfortran.target/riscv/rvv/pr118182.f b/gcc/testsuite/gfortran.target/riscv/rvv/pr118182.f new file mode 100644 index 00000000000..7ecbfeb863b --- /dev/null +++ b/gcc/testsuite/gfortran.target/riscv/rvv/pr118182.f @@ -0,0 +1,63 @@ +! { dg-do run } +! { dg-options "-fno-vect-cost-model" } + + program dqnorm_calculator + implicit none + + ! Declare variables + integer, parameter :: nx = 33, ny = 33, nz =16 + real(8) :: dq(5, nx, ny, nz) + real(8) :: result, expected_result, tolerance + integer :: i, j, k, l + + ! Initialize the dq array with values calculated as k + j + i + 5 + do k = 1, nz + do j = 1, ny + do i = 1, nx + do l = 1, 5 + dq(l, i, j, k) = k + j + i + 5 + end do + end do + end do + end do + + ! Call the subroutine to calculate the norm + call redsum(dq, nx, ny, nz, result) + + ! Check the result + expected_result = 214213560.0d0 + tolerance = 0.0001d0 + if (abs(result - expected_result) > tolerance) then + print *, "Result is incorrect: ", result + call abort() + end if + end + + subroutine redsum(dq, nx, ny, nz, result) + implicit none + + ! Declare arguments and local variables + integer, intent(in) :: nx, ny, nz + real(8), intent(in) :: dq(5, nx, ny, nz) + real(8), intent(out) :: result + real(8) :: dqnorm + integer :: i, j, k, l + + ! Initialize dqnorm + dqnorm = 0.0d0 + + ! Compute the sum of squares of dq elements + do k = 1, nz + do j = 1, ny + do i = 1, nx + do l = 1, 5 + dqnorm = dqnorm + dq(l, i, j, k) * dq(l, i, j, k) + end do + end do + end do + end do + + result = dqnorm + + end subroutine redsum + -- 2.34.1