Consider this following intrinsic code:
void rvv_dot_prod(int16_t *pSrcA, int16_t *pSrcB, uint32_t n, int64_t *result) { size_t vl; vint16m4_t vSrcA, vSrcB; vint64m1_t vSum = __riscv_vmv_s_x_i64m1(0, 1); while (n > 0) { vl = __riscv_vsetvl_e16m4(n); vSrcA = __riscv_vle16_v_i16m4(pSrcA, vl); vSrcB = __riscv_vle16_v_i16m4(pSrcB, vl); vSum = __riscv_vwredsum_vs_i32m8_i64m1(__riscv_vwmul_vv_i32m8(vSrcA, vSrcB, vl), vSum, vl); pSrcA += vl; pSrcB += vl; n -= vl; } *result = __riscv_vmv_x_s_i64m1_i64(vSum); } https://godbolt.org/z/vWd35W7G6 Before this patch: ... Loop: ... vmv1r.v v2,v1 ... vwredsum.vs v1,v8,v2 ... After this patch: ... Loop: ... vwredsum.vs v1,v8,v1 ... PR target/112327 gcc/ChangeLog: * config/riscv/vector.md: Add '0'. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr112327-1.c: New test. * gcc.target/riscv/rvv/base/pr112327-2.c: New test. --- gcc/config/riscv/vector.md | 4 +-- .../gcc.target/riscv/rvv/base/pr112327-1.c | 27 +++++++++++++++++++ .../gcc.target/riscv/rvv/base/pr112327-2.c | 27 +++++++++++++++++++ 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-2.c diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 0297e4f0227..3577971fa33 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -7765,7 +7765,7 @@ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (unspec:<V_EXT_LMUL1> [ (match_operand:VI_QHS 3 "register_operand" " vr, vr") - (match_operand:<V_EXT_LMUL1> 4 "register_operand" " vr, vr") + (match_operand:<V_EXT_LMUL1> 4 "register_operand" " vr0, vr0") ] ANY_WREDUC) (match_operand:<V_EXT_LMUL1> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))] "TARGET_VECTOR" @@ -7834,7 +7834,7 @@ (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) (unspec:<V_EXT_LMUL1> [ (match_operand:VF_HS 3 "register_operand" " vr, vr") - (match_operand:<V_EXT_LMUL1> 4 "register_operand" " vr, vr") + (match_operand:<V_EXT_LMUL1> 4 "register_operand" " vr0, vr0") ] ANY_FWREDUC_SUM) (match_operand:<V_EXT_LMUL1> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))] "TARGET_VECTOR" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-1.c new file mode 100644 index 00000000000..20da23976f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-1.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +void +foo (int16_t *pSrcA, int16_t *pSrcB, uint32_t n, int64_t *result) +{ + size_t vl; + vint16m4_t vSrcA, vSrcB; + vint64m1_t vSum = __riscv_vmv_s_x_i64m1 (0, 1); + while (n > 0) + { + vl = __riscv_vsetvl_e16m4 (n); + vSrcA = __riscv_vle16_v_i16m4 (pSrcA, vl); + vSrcB = __riscv_vle16_v_i16m4 (pSrcB, vl); + vSum = __riscv_vwredsum_vs_i32m8_i64m1 ( + __riscv_vwmul_vv_i32m8 (vSrcA, vSrcB, vl), vSum, vl); + pSrcA += vl; + pSrcB += vl; + n -= vl; + } + *result = __riscv_vmv_x_s_i64m1_i64 (vSum); +} + +/* { dg-final { scan-assembler-not {vmv1r} } } */ +/* { dg-final { scan-assembler-not {vmv\.v\.v} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-2.c new file mode 100644 index 00000000000..5ffde000fbd --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-2.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zfh -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +void +foo (_Float16 *pSrcA, _Float16 *pSrcB, uint32_t n, double *result) +{ + size_t vl; + vfloat16m4_t vSrcA, vSrcB; + vfloat64m1_t vSum = __riscv_vfmv_s_f_f64m1 (0, 1); + while (n > 0) + { + vl = __riscv_vsetvl_e16m4 (n); + vSrcA = __riscv_vle16_v_f16m4 (pSrcA, vl); + vSrcB = __riscv_vle16_v_f16m4 (pSrcB, vl); + vSum = __riscv_vfwredusum_vs_f32m8_f64m1 ( + __riscv_vfwmul_vv_f32m8 (vSrcA, vSrcB, vl), vSum, vl); + pSrcA += vl; + pSrcB += vl; + n -= vl; + } + *result = __riscv_vfmv_f_s_f64m1_f64 (vSum); +} + +/* { dg-final { scan-assembler-not {vmv1r} } } */ +/* { dg-final { scan-assembler-not {vmv\.v\.v} } } */ -- 2.36.3