This is a slightly amended patch that fixes modes and instruction type
attribute. Here is the relevant snippet:
diff --git gcc/config/riscv/autovec-opt.md gcc/config/riscv/autovec-opt.md
index d4335dc04ba..67f4d9ce3a8 100644
--- gcc/config/riscv/autovec-opt.md
+++ gcc/config/riscv/autovec-opt.md
@@ -2146,3 +2146,26 @@ (define_insn_and_split
"*v<ieee_fmaxmin_op>_vf_<mode>"
}
[(set_attr "type" "vfminmax")]
)
+
+;; vfwmul.vf
+(define_insn_and_split "*vfwmul_vf_<mode>"
+ [(set (match_operand:VWEXTF 0 "register_operand")
+ (mult:VWEXTF
+ (float_extend:VWEXTF
+ (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+ (vec_duplicate:VWEXTF
+ (float_extend:<VEL>
+ (match_operand:<VSUBEL> 2 "register_operand")))))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_dual_widen_scalar (MULT,
+
<MODE>mode),
+ riscv_vector::BINARY_OP_FRM_DYN,
operands);
+
+ DONE;
+ }
+ [(set_attr "type" "vfwmul")]
+)
diff --git gcc/config/riscv/vector.md gcc/config/riscv/vector.md
index 2b35d66b611..187d207318c 100644
--- gcc/config/riscv/vector.md
+++ gcc/config/riscv/vector.md
@@ -7296,8 +7296,8 @@ (define_insn "@pred_dual_widen_<optab><mode>_scalar"
(any_widen_binop:VWEXTF
(float_extend:VWEXTF
(match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" "
vr, vr"))
- (float_extend:VWEXTF
- (vec_duplicate:<V_DOUBLE_TRUNC>
+ (vec_duplicate:VWEXTF
+ (float_extend:<VEL>
(match_operand:<VSUBEL> 4 "register_operand" "
f, f"))))
(match_operand:VWEXTF 2 "vector_merge_operand" "
vu, 0")))]
"TARGET_VECTOR"
Thanks,
PA
On 03/09/2025 16:52, Paul-Antoine Arras wrote:
This pattern enables the combine pass (or late-combine, depending on the case)
to merge a float_extend'ed vec_duplicate into a mult RTL instruction.
Before this patch, we have six instructions, e.g.:
fcvt.d.s fa0,fa0
vsetvli a5,zero,e64,m1,ta,ma
vfmv.v.f v3,fa0
vfwcvt.f.f.v v1,v2
vsetvli zero,zero,e64,m1,ta,ma
vfmul.vv v1,v3,v1
After, we get only one:
vfwmul.vf v1,v2,fa0
gcc/ChangeLog:
* config/riscv/autovec-opt.md (*vfwmul_vf_<mode>): New pattern to
combine float_extend + vec_duplicate + vfmul.vv into vfmul.vf.
* config/riscv/vector.md (*@pred_dual_widen_<optab><mode>_scalar):
Swap operands to match the RTL emitted by expand, i.e. first
float_extend then vec_duplicate.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfwmul.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h: Add support for
widening variants.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h: New test
helper.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c: New test.
---
gcc/config/riscv/autovec-opt.md | 23 +++++++++++++
gcc/config/riscv/vector.md | 4 +--
.../riscv/rvv/autovec/vx_vf/vf-1-f16.c | 2 ++
.../riscv/rvv/autovec/vx_vf/vf-1-f32.c | 2 ++
.../riscv/rvv/autovec/vx_vf/vf-2-f16.c | 3 +-
.../riscv/rvv/autovec/vx_vf/vf-2-f32.c | 3 +-
.../riscv/rvv/autovec/vx_vf/vf-3-f16.c | 2 ++
.../riscv/rvv/autovec/vx_vf/vf-3-f32.c | 2 ++
.../riscv/rvv/autovec/vx_vf/vf-4-f16.c | 1 +
.../riscv/rvv/autovec/vx_vf/vf-4-f32.c | 1 +
.../riscv/rvv/autovec/vx_vf/vf_binop.h | 34 +++++++++++++++++--
.../rvv/autovec/vx_vf/vf_binop_widen_run.h | 32 +++++++++++++++++
.../rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c | 20 +++++++++++
.../rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c | 16 +++++++++
14 files changed, 139 insertions(+), 6 deletions(-)
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c
diff --git gcc/config/riscv/autovec-opt.md gcc/config/riscv/autovec-opt.md
index d4335dc04ba..2c595c0bdb2 100644
--- gcc/config/riscv/autovec-opt.md
+++ gcc/config/riscv/autovec-opt.md
@@ -2146,3 +2146,26 @@ (define_insn_and_split "*v<ieee_fmaxmin_op>_vf_<mode>"
}
[(set_attr "type" "vfminmax")]
)
+
+;; vfwmul.vf
+(define_insn_and_split "*vfwmul_vf_<mode>"
+ [(set (match_operand:VWEXTF 0 "register_operand")
+ (mult:VWEXTF
+ (float_extend:VWEXTF
+ (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+ (vec_duplicate:VWEXTF
+ (float_extend:<VEL>
+ (match_operand:<VSUBEL> 2 "register_operand")))))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_dual_widen_scalar (MULT,
+ <MODE>mode),
+ riscv_vector::BINARY_OP_FRM_DYN, operands);
+
+ DONE;
+ }
+ [(set_attr "type" "vfwmuladd")]
+)
diff --git gcc/config/riscv/vector.md gcc/config/riscv/vector.md
index 2b35d66b611..11f5d9dc874 100644
--- gcc/config/riscv/vector.md
+++ gcc/config/riscv/vector.md
@@ -7296,8 +7296,8 @@ (define_insn "@pred_dual_widen_<optab><mode>_scalar"
(any_widen_binop:VWEXTF
(float_extend:VWEXTF
(match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" " vr,
vr"))
- (float_extend:VWEXTF
- (vec_duplicate:<V_DOUBLE_TRUNC>
+ (vec_duplicate:<V_DOUBLE_TRUNC>
+ (float_extend:VWEXTF
(match_operand:<VSUBEL> 4 "register_operand" " f,
f"))))
(match_operand:VWEXTF 2 "vector_merge_operand" " vu,
0")))]
"TARGET_VECTOR"
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
index 0be64f1fd64..cbec87e6c0b 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
@@ -22,6 +22,7 @@ DEF_VF_BINOP_CASE_2_WRAP (_Float16, MIN_FUNC_0_WRAP
(_Float16), min)
DEF_VF_BINOP_CASE_2_WRAP (_Float16, MIN_FUNC_1_WRAP (_Float16), min)
DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_0_WRAP (_Float16), max)
DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max)
+DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, *, mul)
/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
@@ -39,3 +40,4 @@ DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_1_WRAP
(_Float16), max)
/* { dg-final { scan-assembler-times {vfrdiv.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfmin.vf} 2 } } */
/* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */
+/* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
index a9cd38aebeb..b6d94c650b0 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
@@ -22,6 +22,7 @@ DEF_VF_BINOP_CASE_2_WRAP (float, MIN_FUNC_0_WRAP (float), min)
DEF_VF_BINOP_CASE_2_WRAP (float, MIN_FUNC_1_WRAP (float), min)
DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_0_WRAP (float), max)
DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_1_WRAP (float), max)
+DEF_VF_BINOP_WIDEN_CASE_0 (float, double, *, mul)
/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
@@ -39,3 +40,4 @@ DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_1_WRAP (float), max)
/* { dg-final { scan-assembler-times {vfrdiv.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfmin.vf} 2 } } */
/* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */
+/* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
index 0db3048688c..3f31568825e 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
@@ -19,4 +19,5 @@
/* { dg-final { scan-assembler-not {vfrdiv.vf} } } */
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
/* { dg-final { scan-assembler-not {vfmax.vf} } } */
-/* { dg-final { scan-assembler-times {fcvt.s.h} 4 } } */
+/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
+/* { dg-final { scan-assembler-times {fcvt.s.h} 5 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
index 494b33e45b2..21a3e1d16aa 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
@@ -19,4 +19,5 @@
/* { dg-final { scan-assembler-not {vfrdiv.vf} } } */
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
/* { dg-final { scan-assembler-not {vfmax.vf} } } */
-/* { dg-final { scan-assembler-times {fcvt.d.s} 4 } } */
+/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
+/* { dg-final { scan-assembler-times {fcvt.d.s} 5 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
index c2c4f430b15..522b5d29464 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
@@ -26,6 +26,7 @@ DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_0_WRAP
(_Float16), max,
VF_BINOP_FUNC_BODY_X128)
DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max,
VF_BINOP_FUNC_BODY_X128)
+DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, *, mul)
/* { dg-final { scan-assembler {vfmadd.vf} } } */
/* { dg-final { scan-assembler {vfmsub.vf} } } */
@@ -43,3 +44,4 @@ DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_1_WRAP
(_Float16), max,
/* { dg-final { scan-assembler {vfrdiv.vf} } } */
/* { dg-final { scan-assembler {vfmin.vf} } } */
/* { dg-final { scan-assembler {vfmax.vf} } } */
+/* { dg-final { scan-assembler {vfwmul.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
index f2582cad8b3..8d05c54c772 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
@@ -26,6 +26,7 @@ DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_0_WRAP (float), max,
VF_BINOP_FUNC_BODY_X128)
DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_1_WRAP (float), max,
VF_BINOP_FUNC_BODY_X128)
+DEF_VF_BINOP_WIDEN_CASE_1 (float, double, *, mul)
/* { dg-final { scan-assembler {vfmadd.vf} } } */
/* { dg-final { scan-assembler {vfmsub.vf} } } */
@@ -43,3 +44,4 @@ DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_1_WRAP (float), max,
/* { dg-final { scan-assembler {vfrdiv.vf} } } */
/* { dg-final { scan-assembler {vfmin.vf} } } */
/* { dg-final { scan-assembler {vfmax.vf} } } */
+/* { dg-final { scan-assembler {vfwmul.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
index 3fa31504cfe..3058367785e 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
@@ -19,4 +19,5 @@
/* { dg-final { scan-assembler-not {vfrdiv.vf} } } */
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
/* { dg-final { scan-assembler-not {vfmax.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
/* { dg-final { scan-assembler {fcvt.s.h} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
index 3d526b56e01..d687f8f6f62 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
@@ -19,4 +19,5 @@
/* { dg-final { scan-assembler-not {vfrdiv.vf} } } */
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
/* { dg-final { scan-assembler-not {vfmax.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
/* { dg-final { scan-assembler {fcvt.d.s} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
index da02065dda8..c9253e9867a 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
@@ -31,6 +31,21 @@
#define RUN_VF_BINOP_REVERSE_CASE_0_WRAP(T, NAME, out, in, f, n)
\
RUN_VF_BINOP_REVERSE_CASE_0 (T, NAME, out, in, f, n)
+#define DEF_VF_BINOP_WIDEN_CASE_0(T1, T2, OP, NAME) \
+ void test_vf_binop_widen_##NAME##_##T1##_case_0 (T2 *restrict out,
\
+ T1 *restrict in, T1 f, \
+ unsigned n) \
+ {
\
+ for (unsigned i = 0; i < n; i++)
\
+ out[i] = (T2) f * (T2) in[i];
\
+ }
+#define DEF_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, OP, NAME)
\
+ DEF_VF_BINOP_WIDEN_CASE_0 (T1, T2, OP, NAME)
+#define RUN_VF_BINOP_WIDEN_CASE_0(T1, T2, NAME, out, in, f, n)
\
+ test_vf_binop_widen_##NAME##_##T1##_case_0 (out, in, f, n)
+#define RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
\
+ RUN_VF_BINOP_WIDEN_CASE_0 (T1, T2, NAME, out, in, f, n)
+
#define VF_BINOP_BODY(op)
\
out[k + 0] = in[k + 0] op tmp;
\
out[k + 1] = in[k + 1] op tmp;
\
@@ -122,8 +137,6 @@
#define DEF_VF_BINOP_REVERSE_CASE_1_WRAP(T, OP, NAME, BODY)
\
DEF_VF_BINOP_REVERSE_CASE_1 (T, OP, NAME, BODY)
-#endif
-
#define DEF_MIN_0(T)
\
static inline T test_##T##_min_0 (T a, T b) { return a > b ? b : a; }
@@ -224,3 +237,20 @@ DEF_MAX_1 (double)
#define VF_BINOP_FUNC_BODY_X128(op)
\
VF_BINOP_FUNC_BODY_X64 (op)
\
VF_BINOP_FUNC_BODY_X64 (op)
+
+#define DEF_VF_BINOP_WIDEN_CASE_1(TYPE1, TYPE2, OP, NAME)
\
+ void test_vf_binop_widen_##NAME##_##TYPE1##_##TYPE2##_case_1 (
\
+ TYPE2 *__restrict dst, TYPE2 *__restrict dst2, TYPE2 *__restrict dst3,
\
+ TYPE2 *__restrict dst4, TYPE1 *__restrict a, TYPE1 *__restrict b,
\
+ TYPE1 *__restrict a2, TYPE1 *__restrict b2, int n)
\
+ {
\
+ for (int i = 0; i < n; i++)
\
+ {
\
+ dst[i] = (TYPE2) * a OP (TYPE2) b[i]; \
+ dst2[i] = (TYPE2) * a2 OP (TYPE2) b[i]; \
+ dst3[i] = (TYPE2) * a2 OP (TYPE2) a[i]; \
+ dst4[i] = (TYPE2) * a OP (TYPE2) b2[i]; \
+ }
\
+ }
+
+#endif
diff --git
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
new file mode 100644
index 00000000000..929b2dec85d
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
@@ -0,0 +1,32 @@
+#ifndef HAVE_DEFINED_VF_BINOP_WIDEN_RUN_H
+#define HAVE_DEFINED_VF_BINOP_WIDEN_RUN_H
+
+#include <assert.h>
+
+#define N 512
+
+int main ()
+{
+ T1 f;
+ T1 in[N];
+ T2 out[N];
+ T2 out2[N];
+
+ f = LIMIT % 8723;
+ for (int i = 0; i < N; i++)
+ {
+ in[i] = LIMIT + i & 1964;
+ out[i] = LIMIT + i & 628;
+ out2[i] = LIMIT + i & 628;
+ asm volatile ("" ::: "memory");
+ }
+
+ TEST_RUN (T1, T2, NAME, out, in, f, N);
+
+ for (int i = 0; i < N; i++)
+ assert (out[i] == ((T2) f OP (T2) in[i]));
+
+ return 0;
+}
+
+#endif
diff --git
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c
new file mode 100644
index 00000000000..1ba84e59b01
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c
@@ -0,0 +1,20 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh_ok } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvfh" } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_binop.h"
+
+#define T1 _Float16
+#define T2 float
+#define NAME mul
+#define OP *
+
+DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n)
RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -32768
+
+#include "vf_binop_widen_run.h"
diff --git
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c
new file mode 100644
index 00000000000..52fbeb377eb
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_binop.h"
+
+#define T1 float
+#define T2 double
+#define NAME mul
+#define OP *
+
+DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n)
RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -2147483648
+
+#include "vf_binop_widen_run.h"
--
PA
commit 522fa9cf4282cb20ba495610c615a721abc2f9f2
Author: Paul-Antoine Arras <par...@baylibre.com>
Date: Wed Sep 3 14:29:13 2025 +0200
RISC-V: Add pattern for vector-scalar widening floating-point multiply
This pattern enables the combine pass (or late-combine, depending on the case)
to merge a float_extend'ed vec_duplicate into a mult RTL instruction.
Before this patch, we have six instructions, e.g.:
fcvt.d.s fa0,fa0
vsetvli a5,zero,e64,m1,ta,ma
vfmv.v.f v3,fa0
vfwcvt.f.f.v v1,v2
vsetvli zero,zero,e64,m1,ta,ma
vfmul.vv v1,v3,v1
After, we get only one:
vfwmul.vf v1,v2,fa0
gcc/ChangeLog:
* config/riscv/autovec-opt.md (*vfwmul_vf_<mode>): New pattern to
combine float_extend + vec_duplicate + vfmul.vv into vfmul.vf.
* config/riscv/vector.md (*@pred_dual_widen_<optab><mode>_scalar):
Swap operands to match the RTL emitted by expand, i.e. first
float_extend then vec_duplicate.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfwmul.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h: Add support for
widening variants.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h: New test
helper.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c: New test.
diff --git gcc/config/riscv/autovec-opt.md gcc/config/riscv/autovec-opt.md
index d4335dc04ba..67f4d9ce3a8 100644
--- gcc/config/riscv/autovec-opt.md
+++ gcc/config/riscv/autovec-opt.md
@@ -2146,3 +2146,26 @@ (define_insn_and_split "*v<ieee_fmaxmin_op>_vf_<mode>"
}
[(set_attr "type" "vfminmax")]
)
+
+;; vfwmul.vf
+(define_insn_and_split "*vfwmul_vf_<mode>"
+ [(set (match_operand:VWEXTF 0 "register_operand")
+ (mult:VWEXTF
+ (float_extend:VWEXTF
+ (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+ (vec_duplicate:VWEXTF
+ (float_extend:<VEL>
+ (match_operand:<VSUBEL> 2 "register_operand")))))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_dual_widen_scalar (MULT,
+ <MODE>mode),
+ riscv_vector::BINARY_OP_FRM_DYN, operands);
+
+ DONE;
+ }
+ [(set_attr "type" "vfwmul")]
+)
diff --git gcc/config/riscv/vector.md gcc/config/riscv/vector.md
index 2b35d66b611..187d207318c 100644
--- gcc/config/riscv/vector.md
+++ gcc/config/riscv/vector.md
@@ -7296,8 +7296,8 @@ (define_insn "@pred_dual_widen_<optab><mode>_scalar"
(any_widen_binop:VWEXTF
(float_extend:VWEXTF
(match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" " vr, vr"))
- (float_extend:VWEXTF
- (vec_duplicate:<V_DOUBLE_TRUNC>
+ (vec_duplicate:VWEXTF
+ (float_extend:<VEL>
(match_operand:<VSUBEL> 4 "register_operand" " f, f"))))
(match_operand:VWEXTF 2 "vector_merge_operand" " vu, 0")))]
"TARGET_VECTOR"
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
index 0be64f1fd64..cbec87e6c0b 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
@@ -22,6 +22,7 @@ DEF_VF_BINOP_CASE_2_WRAP (_Float16, MIN_FUNC_0_WRAP (_Float16), min)
DEF_VF_BINOP_CASE_2_WRAP (_Float16, MIN_FUNC_1_WRAP (_Float16), min)
DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_0_WRAP (_Float16), max)
DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max)
+DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, *, mul)
/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
@@ -39,3 +40,4 @@ DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max)
/* { dg-final { scan-assembler-times {vfrdiv.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfmin.vf} 2 } } */
/* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */
+/* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
index a9cd38aebeb..b6d94c650b0 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
@@ -22,6 +22,7 @@ DEF_VF_BINOP_CASE_2_WRAP (float, MIN_FUNC_0_WRAP (float), min)
DEF_VF_BINOP_CASE_2_WRAP (float, MIN_FUNC_1_WRAP (float), min)
DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_0_WRAP (float), max)
DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_1_WRAP (float), max)
+DEF_VF_BINOP_WIDEN_CASE_0 (float, double, *, mul)
/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
@@ -39,3 +40,4 @@ DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_1_WRAP (float), max)
/* { dg-final { scan-assembler-times {vfrdiv.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfmin.vf} 2 } } */
/* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */
+/* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
index 0db3048688c..3f31568825e 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
@@ -19,4 +19,5 @@
/* { dg-final { scan-assembler-not {vfrdiv.vf} } } */
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
/* { dg-final { scan-assembler-not {vfmax.vf} } } */
-/* { dg-final { scan-assembler-times {fcvt.s.h} 4 } } */
+/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
+/* { dg-final { scan-assembler-times {fcvt.s.h} 5 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
index 494b33e45b2..21a3e1d16aa 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
@@ -19,4 +19,5 @@
/* { dg-final { scan-assembler-not {vfrdiv.vf} } } */
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
/* { dg-final { scan-assembler-not {vfmax.vf} } } */
-/* { dg-final { scan-assembler-times {fcvt.d.s} 4 } } */
+/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
+/* { dg-final { scan-assembler-times {fcvt.d.s} 5 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
index c2c4f430b15..522b5d29464 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
@@ -26,6 +26,7 @@ DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_0_WRAP (_Float16), max,
VF_BINOP_FUNC_BODY_X128)
DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max,
VF_BINOP_FUNC_BODY_X128)
+DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, *, mul)
/* { dg-final { scan-assembler {vfmadd.vf} } } */
/* { dg-final { scan-assembler {vfmsub.vf} } } */
@@ -43,3 +44,4 @@ DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max,
/* { dg-final { scan-assembler {vfrdiv.vf} } } */
/* { dg-final { scan-assembler {vfmin.vf} } } */
/* { dg-final { scan-assembler {vfmax.vf} } } */
+/* { dg-final { scan-assembler {vfwmul.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
index f2582cad8b3..8d05c54c772 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
@@ -26,6 +26,7 @@ DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_0_WRAP (float), max,
VF_BINOP_FUNC_BODY_X128)
DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_1_WRAP (float), max,
VF_BINOP_FUNC_BODY_X128)
+DEF_VF_BINOP_WIDEN_CASE_1 (float, double, *, mul)
/* { dg-final { scan-assembler {vfmadd.vf} } } */
/* { dg-final { scan-assembler {vfmsub.vf} } } */
@@ -43,3 +44,4 @@ DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_1_WRAP (float), max,
/* { dg-final { scan-assembler {vfrdiv.vf} } } */
/* { dg-final { scan-assembler {vfmin.vf} } } */
/* { dg-final { scan-assembler {vfmax.vf} } } */
+/* { dg-final { scan-assembler {vfwmul.vf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
index 3fa31504cfe..3058367785e 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
@@ -19,4 +19,5 @@
/* { dg-final { scan-assembler-not {vfrdiv.vf} } } */
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
/* { dg-final { scan-assembler-not {vfmax.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
/* { dg-final { scan-assembler {fcvt.s.h} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
index 3d526b56e01..d687f8f6f62 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
@@ -19,4 +19,5 @@
/* { dg-final { scan-assembler-not {vfrdiv.vf} } } */
/* { dg-final { scan-assembler-not {vfmin.vf} } } */
/* { dg-final { scan-assembler-not {vfmax.vf} } } */
+/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
/* { dg-final { scan-assembler {fcvt.d.s} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
index da02065dda8..c9253e9867a 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
@@ -31,6 +31,21 @@
#define RUN_VF_BINOP_REVERSE_CASE_0_WRAP(T, NAME, out, in, f, n) \
RUN_VF_BINOP_REVERSE_CASE_0 (T, NAME, out, in, f, n)
+#define DEF_VF_BINOP_WIDEN_CASE_0(T1, T2, OP, NAME) \
+ void test_vf_binop_widen_##NAME##_##T1##_case_0 (T2 *restrict out, \
+ T1 *restrict in, T1 f, \
+ unsigned n) \
+ { \
+ for (unsigned i = 0; i < n; i++) \
+ out[i] = (T2) f * (T2) in[i]; \
+ }
+#define DEF_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, OP, NAME) \
+ DEF_VF_BINOP_WIDEN_CASE_0 (T1, T2, OP, NAME)
+#define RUN_VF_BINOP_WIDEN_CASE_0(T1, T2, NAME, out, in, f, n) \
+ test_vf_binop_widen_##NAME##_##T1##_case_0 (out, in, f, n)
+#define RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n) \
+ RUN_VF_BINOP_WIDEN_CASE_0 (T1, T2, NAME, out, in, f, n)
+
#define VF_BINOP_BODY(op) \
out[k + 0] = in[k + 0] op tmp; \
out[k + 1] = in[k + 1] op tmp; \
@@ -122,8 +137,6 @@
#define DEF_VF_BINOP_REVERSE_CASE_1_WRAP(T, OP, NAME, BODY) \
DEF_VF_BINOP_REVERSE_CASE_1 (T, OP, NAME, BODY)
-#endif
-
#define DEF_MIN_0(T) \
static inline T test_##T##_min_0 (T a, T b) { return a > b ? b : a; }
@@ -224,3 +237,20 @@ DEF_MAX_1 (double)
#define VF_BINOP_FUNC_BODY_X128(op) \
VF_BINOP_FUNC_BODY_X64 (op) \
VF_BINOP_FUNC_BODY_X64 (op)
+
+#define DEF_VF_BINOP_WIDEN_CASE_1(TYPE1, TYPE2, OP, NAME) \
+ void test_vf_binop_widen_##NAME##_##TYPE1##_##TYPE2##_case_1 ( \
+ TYPE2 *__restrict dst, TYPE2 *__restrict dst2, TYPE2 *__restrict dst3, \
+ TYPE2 *__restrict dst4, TYPE1 *__restrict a, TYPE1 *__restrict b, \
+ TYPE1 *__restrict a2, TYPE1 *__restrict b2, int n) \
+ { \
+ for (int i = 0; i < n; i++) \
+ { \
+ dst[i] = (TYPE2) * a OP (TYPE2) b[i]; \
+ dst2[i] = (TYPE2) * a2 OP (TYPE2) b[i]; \
+ dst3[i] = (TYPE2) * a2 OP (TYPE2) a[i]; \
+ dst4[i] = (TYPE2) * a OP (TYPE2) b2[i]; \
+ } \
+ }
+
+#endif
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
new file mode 100644
index 00000000000..929b2dec85d
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
@@ -0,0 +1,32 @@
+#ifndef HAVE_DEFINED_VF_BINOP_WIDEN_RUN_H
+#define HAVE_DEFINED_VF_BINOP_WIDEN_RUN_H
+
+#include <assert.h>
+
+#define N 512
+
+int main ()
+{
+ T1 f;
+ T1 in[N];
+ T2 out[N];
+ T2 out2[N];
+
+ f = LIMIT % 8723;
+ for (int i = 0; i < N; i++)
+ {
+ in[i] = LIMIT + i & 1964;
+ out[i] = LIMIT + i & 628;
+ out2[i] = LIMIT + i & 628;
+ asm volatile ("" ::: "memory");
+ }
+
+ TEST_RUN (T1, T2, NAME, out, in, f, N);
+
+ for (int i = 0; i < N; i++)
+ assert (out[i] == ((T2) f OP (T2) in[i]));
+
+ return 0;
+}
+
+#endif
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c
new file mode 100644
index 00000000000..1ba84e59b01
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f16.c
@@ -0,0 +1,20 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh_ok } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvfh" } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_binop.h"
+
+#define T1 _Float16
+#define T2 float
+#define NAME mul
+#define OP *
+
+DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -32768
+
+#include "vf_binop_widen_run.h"
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c
new file mode 100644
index 00000000000..52fbeb377eb
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwmul-run-1-f32.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_binop.h"
+
+#define T1 float
+#define T2 double
+#define NAME mul
+#define OP *
+
+DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -2147483648
+
+#include "vf_binop_widen_run.h"