For 'wv' instructions, e.g. vwadd.wv vd,vs2,vs1. vs2 has same EEW as vd. vs1 has smaller than vd.
So, vs2 can overlap with vd, but vs1 can only overlap highest-number of vd when LMUL of vs1 is greater than 1. We already have supported overlap for vs1 LMUL >= 1. But I forget vs1 LMUL < 1, vs2 can overlap vd even though vs1 totally can not overlap vd. Consider the reduction auto-vectorization: int64_t reduc_plus_int (int *__restrict a, int n) { int64_t r = 0; for (int i = 0; i < n; ++i) r += a[i]; return r; } When we use --param=riscv-autovec-lmul=m2, the codegen is good to us because we already supported overlap for source EEW32 LMUL1 -> dest EEW64 LMUL2. --param=riscv-autovec-lmul=m2: reduc_plus_int: ble a1,zero,.L4 vsetvli a5,zero,e64,m2,ta,ma vmv.v.i v2,0 .L3: vsetvli a5,a1,e32,m1,tu,ma slli a4,a5,2 sub a1,a1,a5 vle32.v v1,0(a0) add a0,a0,a4 vwadd.wv v2,v2,v1 bne a1,zero,.L3 li a5,0 vsetivli zero,1,e64,m1,ta,ma vmv.s.x v1,a5 vsetvli a5,zero,e64,m2,ta,ma vredsum.vs v2,v2,v1 vmv.x.s a0,v2 ret .L4: li a0,0 ret However, default LMUL (--param=riscv-autovec-lmul=m1) generates redundant vmv1r since it is EEW32 LMUL=MF2 -> EEW64 LMUL = 1 Before this patch: reduc_plus_int: ble a1,zero,.L4 vsetvli a5,zero,e64,m1,ta,ma vmv.v.i v1,0 .L3: vsetvli a5,a1,e32,mf2,tu,ma slli a4,a5,2 sub a1,a1,a5 vle32.v v2,0(a0) vmv1r.v v3,v1 ----> This should be removed. add a0,a0,a4 vwadd.wv v1,v3,v2 ----> vs2 should be v1 bne a1,zero,.L3 li a5,0 vsetivli zero,1,e64,m1,ta,ma vmv.s.x v2,a5 vsetvli a5,zero,e64,m1,ta,ma vredsum.vs v1,v1,v2 vmv.x.s a0,v1 ret .L4: li a0,0 ret After this patch: reduc_plus_int: ble a1,zero,.L4 vsetvli a5,zero,e64,m1,ta,ma vmv.v.i v1,0 .L3: vsetvli a5,a1,e32,mf2,tu,ma slli a4,a5,2 sub a1,a1,a5 vle32.v v2,0(a0) add a0,a0,a4 vwadd.wv v1,v1,v2 bne a1,zero,.L3 li a5,0 vsetivli zero,1,e64,m1,ta,ma vmv.s.x v2,a5 vsetvli a5,zero,e64,m1,ta,ma vredsum.vs v1,v1,v2 vmv.x.s a0,v1 ret .L4: li a0,0 ret PR target/112432 gcc/ChangeLog: * config/riscv/riscv.md (none,W21,W42,W84,W43,W86,W87): Add W0. (none,W21,W42,W84,W43,W86,W87,W0): Ditto. * config/riscv/vector.md: Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr112432-42.c: New test. --- gcc/config/riscv/riscv.md | 14 +++- gcc/config/riscv/vector.md | 84 +++++++++---------- .../gcc.target/riscv/rvv/base/pr112432-42.c | 30 +++++++ 3 files changed, 82 insertions(+), 46 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112432-42.c diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index eed997116b0..ee8b71c22aa 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -503,7 +503,7 @@ ;; Widening instructions have group-overlap constraints. Those are only ;; valid for certain register-group sizes. This attribute marks the ;; alternatives not matching the required register-group size as disabled. -(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87" +(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0" (const_string "none")) (define_attr "group_overlap_valid" "no,yes" @@ -524,9 +524,9 @@ ;; According to RVV ISA: ;; The destination EEW is greater than the source EEW, the source EMUL is at least 1, - ;; and the overlap is in the highest-numbered part of the destination register group - ;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, v2, or v4 is not). - ;; So the source operand should have LMUL >= 1. + ;; and the overlap is in the highest-numbered part of the destination register group + ;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, v2, or v4 is not). + ;; So the source operand should have LMUL >= 1. (and (eq_attr "group_overlap" "W43") (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) != 4 && riscv_get_v_regno_alignment (GET_MODE (operands[3])) >= 1")) @@ -536,6 +536,12 @@ (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) != 8 && riscv_get_v_regno_alignment (GET_MODE (operands[3])) >= 1")) (const_string "no") + + ;; W21 supports highest-number overlap for source LMUL = 1. + ;; For 'wv' variant, we can also allow wide source operand overlaps dest operand. + (and (eq_attr "group_overlap" "W0") + (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) > 1")) + (const_string "no") ] (const_string "yes"))) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 7646615b12a..f607d768b26 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -3776,48 +3776,48 @@ (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) (define_insn "@pred_single_widen_sub<any_extend:su><mode>" - [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr") + [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, &vr, &vr, ?&vr, ?&vr") (if_then_else:VWEXTI (unspec:<VM> - [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1") - (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") - (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") - (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") - (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") + (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (minus:VWEXTI - (match_operand:VWEXTI 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr") + (match_operand:VWEXTI 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, 0, 0, vr, vr") (any_extend:VWEXTI - (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr"))) - (match_operand:VWEXTI 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0")))] + (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr, vr, vr"))) + (match_operand:VWEXTI 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0, vu, 0")))] "TARGET_VECTOR" "vwsub<any_extend:u>.wv\t%0,%3,%4%p1" [(set_attr "type" "viwalu") (set_attr "mode" "<V_DOUBLE_TRUNC>") - (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) + (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,W0,W0,none,none")]) (define_insn "@pred_single_widen_add<any_extend:su><mode>" - [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr") + [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, &vr, &vr, ?&vr, ?&vr") (if_then_else:VWEXTI (unspec:<VM> - [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1") - (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") - (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") - (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") - (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") + (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (plus:VWEXTI (any_extend:VWEXTI - (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr")) - (match_operand:VWEXTI 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr")) - (match_operand:VWEXTI 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0")))] + (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr, vr, vr")) + (match_operand:VWEXTI 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, 0, 0, vr, vr")) + (match_operand:VWEXTI 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0, vu, 0")))] "TARGET_VECTOR" "vwadd<any_extend:u>.wv\t%0,%3,%4%p1" [(set_attr "type" "viwalu") (set_attr "mode" "<V_DOUBLE_TRUNC>") - (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) + (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,W0,W0,none,none")]) (define_insn "@pred_single_widen_<plus_minus:optab><any_extend:su><mode>_scalar" [(set (match_operand:VWEXTI 0 "register_operand" "=vr, vr") @@ -7056,56 +7056,56 @@ (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) (define_insn "@pred_single_widen_add<mode>" - [(set (match_operand:VWEXTF 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr") + [(set (match_operand:VWEXTF 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, &vr, &vr, ?&vr, ?&vr") (if_then_else:VWEXTF (unspec:<VM> - [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1") - (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") - (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") - (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") - (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") - (match_operand 9 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") + (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 9 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) (plus:VWEXTF (float_extend:VWEXTF - (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr")) - (match_operand:VWEXTF 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr")) - (match_operand:VWEXTF 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0")))] + (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr, vr, vr")) + (match_operand:VWEXTF 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, 0, 0, vr, vr")) + (match_operand:VWEXTF 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0, vu, 0")))] "TARGET_VECTOR" "vfwadd.wv\t%0,%3,%4%p1" [(set_attr "type" "vfwalu") (set_attr "mode" "<V_DOUBLE_TRUNC>") (set (attr "frm_mode") (symbol_ref "riscv_vector::get_frm_mode (operands[9])")) - (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) + (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,W0,W0,none,none")]) (define_insn "@pred_single_widen_sub<mode>" - [(set (match_operand:VWEXTF 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr") + [(set (match_operand:VWEXTF 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, &vr, &vr, ?&vr, ?&vr") (if_then_else:VWEXTF (unspec:<VM> - [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1") - (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") - (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") - (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") - (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") - (match_operand 9 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") + (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 9 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) (minus:VWEXTF - (match_operand:VWEXTF 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr") + (match_operand:VWEXTF 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, 0, 0, vr, vr") (float_extend:VWEXTF - (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr"))) - (match_operand:VWEXTF 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0")))] + (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr, vr, vr"))) + (match_operand:VWEXTF 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0, vu, 0")))] "TARGET_VECTOR" "vfwsub.wv\t%0,%3,%4%p1" [(set_attr "type" "vfwalu") (set_attr "mode" "<V_DOUBLE_TRUNC>") (set (attr "frm_mode") (symbol_ref "riscv_vector::get_frm_mode (operands[9])")) - (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) + (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,W0,W0,none,none")]) (define_insn "@pred_single_widen_<plus_minus:optab><mode>_scalar" [(set (match_operand:VWEXTF 0 "register_operand" "=vr, vr") diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112432-42.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112432-42.c new file mode 100644 index 00000000000..1ee5b20a899 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112432-42.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ffast-math" } */ + +#include <stdint-gcc.h> + +int64_t +reduc_plus_int (int *__restrict a, int n) +{ + int64_t r = 0; + for (int i = 0; i < n; ++i) + r += a[i]; + return r; +} + +double +reduc_plus_float (float *__restrict a, int n) +{ + double r = 0; + for (int i = 0; i < n; ++i) + r += a[i]; + return r; +} + +/* { dg-final { scan-assembler-not {vmv1r} } } */ +/* { dg-final { scan-assembler-not {vmv2r} } } */ +/* { dg-final { scan-assembler-not {vmv4r} } } */ +/* { dg-final { scan-assembler-not {vmv8r} } } */ +/* { dg-final { scan-assembler-not {csrr} } } */ +/* { dg-final { scan-assembler-times {vwadd\.wv} 1 } } */ +/* { dg-final { scan-assembler-times {vfwadd\.wv} 1 } } */ -- 2.36.3