On 6/4/25 11:34, Paul-Antoine Arras wrote: > Hi Vineet, > > On 04/06/2025 20:29, Vineet Gupta wrote: >> Hi Paul, >> >> On 5/30/25 03:04, Paul-Antoine Arras wrote: >>> This pattern enables the combine pass (or late-combine, depending on the >>> case) >>> to merge a vec_duplicate into a plus-mult or minus-mult RTL instruction. >>> >>> Before this patch, we have two instructions, e.g.: >>> vfmv.v.f v6,fa0 >>> vfmadd.vv v9,v6,v7 >>> >>> After, we get only one: >>> vfmadd.vf v9,fa0,v7 >> This patch seems to cause a functional regression. I'm seeing SPEC2017 526 >> blender output mismatch failures on QEMU icounts test harness >> >> running compare cmd 1 >> cpu2017-1.1.7/bin/specperl cpu2017-1.1.7/bin/harness/specdiff -m -l 10 >> --reltol 0.05 >> >> cpu2017-1.1.7/benchspec/CPU/526.blender_r/data/refrate/output/imagevalidate_sh3_no_char_0849.out >> imagevalidate_sh3_no_char_0849.out >> >> exited with status 1 > I have not tried to reproduce it yet but it likely comes from a sign > mistake in the handling of vfmsub. Does the attached patch fix it for you?
Thx, I can try it. Curious, are both the new patterns part of the sign fix, if not what minimal hunk would be it. -Vineet > > Thanks, > > PA > >>> On SPEC2017's 503.bwaves_r, depending on the workload, the reduction in >>> dynamic >>> instruction count varies from -4.66% to -4.75%. >>> >>> PR target/119100 >>> >>> gcc/ChangeLog: >>> >>> * config/riscv/autovec-opt.md (*<optab>_vf_<mode>): Add new pattern to >>> combine vec_duplicate + vfm{add,sub}.vv into vfm{add,sub}.vf. >>> * config/riscv/riscv-opts.h (FPR2VR_COST_UNPROVIDED): Define. >>> * config/riscv/riscv-protos.h (get_fr2vr_cost): Declare function. >>> * config/riscv/riscv.cc (riscv_rtx_costs): Add cost model for MULT with >>> VEC_DUPLICATE. >>> (get_fr2vr_cost): New function. >>> * config/riscv/riscv.opt: Add new option --param=fpr2vr-cost. >>> >>> gcc/testsuite/ChangeLog: >>> >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f64.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f64.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f64.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f64.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_data.h: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_run.h: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f32.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f64.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f32.c: New test. >>> * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f64.c: New test. >>> --- >>> gcc/config/riscv/autovec-opt.md | 52 +++ >>> gcc/config/riscv/riscv-opts.h | 1 + >>> gcc/config/riscv/riscv-protos.h | 1 + >>> gcc/config/riscv/riscv.cc | 39 +- >>> gcc/config/riscv/riscv.opt | 4 + >>> .../riscv/rvv/autovec/vx_vf/vf-1-f16.c | 10 + >>> .../riscv/rvv/autovec/vx_vf/vf-1-f32.c | 10 + >>> .../riscv/rvv/autovec/vx_vf/vf-1-f64.c | 10 + >>> .../riscv/rvv/autovec/vx_vf/vf-2-f16.c | 10 + >>> .../riscv/rvv/autovec/vx_vf/vf-2-f32.c | 10 + >>> .../riscv/rvv/autovec/vx_vf/vf-2-f64.c | 10 + >>> .../riscv/rvv/autovec/vx_vf/vf-3-f16.c | 10 + >>> .../riscv/rvv/autovec/vx_vf/vf-3-f32.c | 10 + >>> .../riscv/rvv/autovec/vx_vf/vf-3-f64.c | 10 + >>> .../riscv/rvv/autovec/vx_vf/vf-4-f16.c | 10 + >>> .../riscv/rvv/autovec/vx_vf/vf-4-f32.c | 10 + >>> .../riscv/rvv/autovec/vx_vf/vf-4-f64.c | 10 + >>> .../riscv/rvv/autovec/vx_vf/vf_mulop.h | 61 +++ >>> .../riscv/rvv/autovec/vx_vf/vf_mulop_data.h | 413 ++++++++++++++++++ >>> .../riscv/rvv/autovec/vx_vf/vf_mulop_run.h | 34 ++ >>> .../rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c | 15 + >>> .../rvv/autovec/vx_vf/vf_vfmadd-run-1-f32.c | 15 + >>> .../rvv/autovec/vx_vf/vf_vfmadd-run-1-f64.c | 15 + >>> .../rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c | 15 + >>> .../rvv/autovec/vx_vf/vf_vfmsub-run-1-f32.c | 15 + >>> .../rvv/autovec/vx_vf/vf_vfmsub-run-1-f64.c | 15 + >>> 26 files changed, 811 insertions(+), 4 deletions(-) >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f64.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f64.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f64.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f64.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_data.h >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_run.h >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f32.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f64.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f32.c >>> create mode 100644 >>> gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f64.c >>> >>> diff --git a/gcc/config/riscv/autovec-opt.md >>> b/gcc/config/riscv/autovec-opt.md >>> index a972eda8de4..19eb16c7540 100644 >>> --- a/gcc/config/riscv/autovec-opt.md >>> +++ b/gcc/config/riscv/autovec-opt.md >>> @@ -1713,3 +1713,55 @@ (define_insn_and_split "*<optab>_vx_<mode>" >>> <MODE>mode); >>> } >>> [(set_attr "type" "vialu")]) >>> + >>> +;; >>> ============================================================================= >>> +;; Combine vec_duplicate + op.vv to op.vf >>> +;; Include >>> +;; - vfmadd.vf >>> +;; - vfmsub.vf >>> +;; >>> ============================================================================= >>> + >>> + >>> +(define_insn_and_split "*<optab>_vf_<mode>" >>> + [(set (match_operand:V_VLSF 0 "register_operand" "=vd") >>> + (plus_minus:V_VLSF >>> + (mult:V_VLSF >>> + (vec_duplicate:V_VLSF >>> + (match_operand:<VEL> 1 "register_operand" " f")) >>> + (match_operand:V_VLSF 2 "register_operand" " 0")) >>> + (match_operand:V_VLSF 3 "register_operand" " vr")))] >>> + "TARGET_VECTOR && can_create_pseudo_p ()" >>> + "#" >>> + "&& 1" >>> + [(const_int 0)] >>> + { >>> + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], >>> + operands[2]}; >>> + riscv_vector::emit_vlmax_insn (code_for_pred_mul_scalar (<CODE>, >>> <MODE>mode), >>> + riscv_vector::TERNARY_OP_FRM_DYN, ops); >>> + DONE; >>> + } >>> + [(set_attr "type" "vfmuladd")] >>> +) >>> + >>> +(define_insn_and_split "*<optab>_vf_<mode>" >>> + [(set (match_operand:V_VLSF 0 "register_operand" "=vd") >>> + (plus_minus:V_VLSF >>> + (match_operand:V_VLSF 3 "register_operand" " vr") >>> + (mult:V_VLSF >>> + (vec_duplicate:V_VLSF >>> + (match_operand:<VEL> 1 "register_operand" " f")) >>> + (match_operand:V_VLSF 2 "register_operand" " 0"))))] >>> + "TARGET_VECTOR && can_create_pseudo_p ()" >>> + "#" >>> + "&& 1" >>> + [(const_int 0)] >>> + { >>> + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], >>> + operands[2]}; >>> + riscv_vector::emit_vlmax_insn (code_for_pred_mul_scalar (<CODE>, >>> <MODE>mode), >>> + riscv_vector::TERNARY_OP_FRM_DYN, ops); >>> + DONE; >>> + } >>> + [(set_attr "type" "vfmuladd")] >>> +) >>> diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h >>> index c02c599ed39..e1a820bb50e 100644 >>> --- a/gcc/config/riscv/riscv-opts.h >>> +++ b/gcc/config/riscv/riscv-opts.h >>> @@ -164,6 +164,7 @@ enum riscv_tls_type { >>> (TARGET_VECTOR && riscv_mautovec_segment) >>> >>> #define GPR2VR_COST_UNPROVIDED -1 >>> +#define FPR2VR_COST_UNPROVIDED -1 >>> >>> /* Extra extension flags, used for carry extra info for a RISC-V >>> extension. */ >>> enum >>> diff --git a/gcc/config/riscv/riscv-protos.h >>> b/gcc/config/riscv/riscv-protos.h >>> index d8c8f6b5079..a0331204479 100644 >>> --- a/gcc/config/riscv/riscv-protos.h >>> +++ b/gcc/config/riscv/riscv-protos.h >>> @@ -841,6 +841,7 @@ const struct riscv_tune_info * >>> riscv_parse_tune (const char *, bool); >>> const cpu_vector_cost *get_vector_costs (); >>> int get_gr2vr_cost (); >>> +int get_fr2vr_cost (); >>> >>> enum >>> { >>> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc >>> index 2cc69b4458a..d99249f70e8 100644 >>> --- a/gcc/config/riscv/riscv.cc >>> +++ b/gcc/config/riscv/riscv.cc >>> @@ -3904,6 +3904,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int >>> outer_code, int opno ATTRIBUTE_UN >>> if (riscv_v_ext_mode_p (mode)) >>> { >>> int gr2vr_cost = get_gr2vr_cost (); >>> + int fr2vr_cost = get_fr2vr_cost (); >>> >>> switch (outer_code) >>> { >>> @@ -3922,14 +3923,29 @@ riscv_rtx_costs (rtx x, machine_mode mode, int >>> outer_code, int opno ATTRIBUTE_UN >>> { >>> rtx op_0 = XEXP (x, 0); >>> rtx op_1 = XEXP (x, 1); >>> + rtx op; >>> >>> if (GET_CODE (op_0) == VEC_DUPLICATE >>> || GET_CODE (op_1) == VEC_DUPLICATE) >>> - *total = (gr2vr_cost + 1) * COSTS_N_INSNS (1); >>> - else >>> - *total = COSTS_N_INSNS (1); >>> + { >>> + *total = (gr2vr_cost + 1) * COSTS_N_INSNS (1); >>> + break; >>> + } >>> + else if (GET_CODE (op = op_0) == MULT >>> + || GET_CODE (op = op_1) == MULT) >>> + { >>> + rtx mult_op0 = XEXP (op, 0); >>> + if (GET_CODE (mult_op0) == VEC_DUPLICATE) >>> + { >>> + if (FLOAT_MODE_P (mode)) >>> + *total = (fr2vr_cost + 1) * COSTS_N_INSNS (1); >>> + else >>> + *total = (gr2vr_cost + 1) * COSTS_N_INSNS (1); >>> + break; >>> + } >>> + } >>> } >>> - break; >>> + /* Fall through. */ >>> default: >>> *total = COSTS_N_INSNS (1); >>> break; >>> @@ -12646,6 +12662,21 @@ get_gr2vr_cost () >>> return cost; >>> } >>> >>> +/* Return the cost of moving data from floating-point to vector register. >>> + It will take the value of --param=fpr2vr-cost if it is provided. >>> + Otherwise the default regmove->FR2VR will be returned. */ >>> + >>> +int >>> +get_fr2vr_cost () >>> +{ >>> + int cost = get_vector_costs ()->regmove->FR2VR; >>> + >>> + if (fpr2vr_cost != FPR2VR_COST_UNPROVIDED) >>> + cost = fpr2vr_cost; >>> + >>> + return cost; >>> +} >>> + >>> /* Implement targetm.vectorize.builtin_vectorization_cost. */ >>> >>> static int >>> diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt >>> index b2b9d3311f4..6543fd1c4a7 100644 >>> --- a/gcc/config/riscv/riscv.opt >>> +++ b/gcc/config/riscv/riscv.opt >>> @@ -286,6 +286,10 @@ Max number of bytes to compare as part of inlined >>> strcmp/strncmp routines (defau >>> Target RejectNegative Joined UInteger Var(gpr2vr_cost) >>> Init(GPR2VR_COST_UNPROVIDED) >>> Set the cost value of the rvv instruction when operate from GPR to VR. >>> >>> +-param=fpr2vr-cost= >>> +Target RejectNegative Joined UInteger Var(fpr2vr_cost) >>> Init(FPR2VR_COST_UNPROVIDED) >>> +Set the cost value of the rvv instruction when operate from FPR to VR. >>> + >>> -param=riscv-autovec-mode= >>> Target Undocumented RejectNegative Joined Var(riscv_autovec_mode) Save >>> Set the only autovec mode to try. >>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c >>> new file mode 100644 >>> index 00000000000..821e5c589a4 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c >>> @@ -0,0 +1,10 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d --param=fpr2vr-cost=0" } >>> */ >>> + >>> +#include "vf_mulop.h" >>> + >>> +DEF_VF_MULOP_CASE_0(_Float16, +, add) >>> +DEF_VF_MULOP_CASE_0(_Float16, -, sub) >>> + >>> +/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */ >>> +/* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */ >>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c >>> new file mode 100644 >>> index 00000000000..49b42879a51 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c >>> @@ -0,0 +1,10 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=0" } */ >>> + >>> +#include "vf_mulop.h" >>> + >>> +DEF_VF_MULOP_CASE_0(float, +, add) >>> +DEF_VF_MULOP_CASE_0(float, -, sub) >>> + >>> +/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */ >>> +/* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */ >>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f64.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f64.c >>> new file mode 100644 >>> index 00000000000..2bb5d891237 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f64.c >>> @@ -0,0 +1,10 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=0" } */ >>> + >>> +#include "vf_mulop.h" >>> + >>> +DEF_VF_MULOP_CASE_0(double, +, add) >>> +DEF_VF_MULOP_CASE_0(double, -, sub) >>> + >>> +/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */ >>> +/* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */ >>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c >>> new file mode 100644 >>> index 00000000000..cbb43cabe98 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c >>> @@ -0,0 +1,10 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d --param=fpr2vr-cost=1" } >>> */ >>> + >>> +#include "vf_mulop.h" >>> + >>> +DEF_VF_MULOP_CASE_0(_Float16, +, add) >>> +DEF_VF_MULOP_CASE_0(_Float16, -, sub) >>> + >>> +/* { dg-final { scan-assembler-not {vfmadd.vf} } } */ >>> +/* { dg-final { scan-assembler-not {vfmsub.vf} } } */ >>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c >>> new file mode 100644 >>> index 00000000000..66ff9b8c75e >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c >>> @@ -0,0 +1,10 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=1" } */ >>> + >>> +#include "vf_mulop.h" >>> + >>> +DEF_VF_MULOP_CASE_0(float, +, add) >>> +DEF_VF_MULOP_CASE_0(float, -, sub) >>> + >>> +/* { dg-final { scan-assembler-not {vfmadd.vf} } } */ >>> +/* { dg-final { scan-assembler-not {vfmsub.vf} } } */ >>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f64.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f64.c >>> new file mode 100644 >>> index 00000000000..66ff9b8c75e >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f64.c >>> @@ -0,0 +1,10 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=1" } */ >>> + >>> +#include "vf_mulop.h" >>> + >>> +DEF_VF_MULOP_CASE_0(float, +, add) >>> +DEF_VF_MULOP_CASE_0(float, -, sub) >>> + >>> +/* { dg-final { scan-assembler-not {vfmadd.vf} } } */ >>> +/* { dg-final { scan-assembler-not {vfmsub.vf} } } */ >>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c >>> new file mode 100644 >>> index 00000000000..45980f49693 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c >>> @@ -0,0 +1,10 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d --param=fpr2vr-cost=0" } >>> */ >>> + >>> +#include "vf_mulop.h" >>> + >>> +DEF_VF_MULOP_CASE_1(_Float16, +, add, VF_MULOP_BODY_X16) >>> +DEF_VF_MULOP_CASE_1(_Float16, -, sub, VF_MULOP_BODY_X16) >>> + >>> +/* { dg-final { scan-assembler {vfmadd.vf} } } */ >>> +/* { dg-final { scan-assembler {vfmsub.vf} } } */ >>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c >>> new file mode 100644 >>> index 00000000000..c853620bb13 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c >>> @@ -0,0 +1,10 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=0" } */ >>> + >>> +#include "vf_mulop.h" >>> + >>> +DEF_VF_MULOP_CASE_1(float, +, add, VF_MULOP_BODY_X16) >>> +DEF_VF_MULOP_CASE_1(float, -, sub, VF_MULOP_BODY_X16) >>> + >>> +/* { dg-final { scan-assembler {vfmadd.vf} } } */ >>> +/* { dg-final { scan-assembler {vfmsub.vf} } } */ >>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f64.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f64.c >>> new file mode 100644 >>> index 00000000000..d38ae8b3220 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f64.c >>> @@ -0,0 +1,10 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=0" } */ >>> + >>> +#include "vf_mulop.h" >>> + >>> +DEF_VF_MULOP_CASE_1(double, +, add, VF_MULOP_BODY_X16) >>> +DEF_VF_MULOP_CASE_1(double, -, sub, VF_MULOP_BODY_X16) >>> + >>> +/* { dg-final { scan-assembler {vfmadd.vf} } } */ >>> +/* { dg-final { scan-assembler {vfmsub.vf} } } */ >>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c >>> new file mode 100644 >>> index 00000000000..f1ca34e6d56 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c >>> @@ -0,0 +1,10 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d --param=fpr2vr-cost=4" } >>> */ >>> + >>> +#include "vf_mulop.h" >>> + >>> +DEF_VF_MULOP_CASE_1(_Float16, +, add, VF_MULOP_BODY_X16) >>> +DEF_VF_MULOP_CASE_1(_Float16, -, sub, VF_MULOP_BODY_X16) >>> + >>> +/* { dg-final { scan-assembler-not {vfmadd.vf} } } */ >>> +/* { dg-final { scan-assembler-not {vfmsub.vf} } } */ >>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c >>> new file mode 100644 >>> index 00000000000..6730d4b154d >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c >>> @@ -0,0 +1,10 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=4" } */ >>> + >>> +#include "vf_mulop.h" >>> + >>> +DEF_VF_MULOP_CASE_1(float, +, add, VF_MULOP_BODY_X16) >>> +DEF_VF_MULOP_CASE_1(float, -, sub, VF_MULOP_BODY_X16) >>> + >>> +/* { dg-final { scan-assembler-not {vfmadd.vf} } } */ >>> +/* { dg-final { scan-assembler-not {vfmsub.vf} } } */ >>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f64.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f64.c >>> new file mode 100644 >>> index 00000000000..bcb6a6e5696 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f64.c >>> @@ -0,0 +1,10 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-march=rv64gcv -mabi=lp64d --param=fpr2vr-cost=4" } */ >>> + >>> +#include "vf_mulop.h" >>> + >>> +DEF_VF_MULOP_CASE_1(double, +, add, VF_MULOP_BODY_X16) >>> +DEF_VF_MULOP_CASE_1(double, -, sub, VF_MULOP_BODY_X16) >>> + >>> +/* { dg-final { scan-assembler-not {vfmadd.vf} } } */ >>> +/* { dg-final { scan-assembler-not {vfmsub.vf} } } */ >>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h >>> new file mode 100644 >>> index 00000000000..52539788906 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop.h >>> @@ -0,0 +1,61 @@ >>> +#ifndef HAVE_DEFINED_VF_MULOP_H >>> +#define HAVE_DEFINED_VF_MULOP_H >>> + >>> +#include <stdint.h> >>> + >>> +#define DEF_VF_MULOP_CASE_0(T, OP, NAME) >>> \ >>> + void test_vf_mulop_##NAME##_##T##_case_0(T *restrict out, T *restrict >>> in, \ >>> + T x, unsigned n) { >>> \ >>> + for (unsigned i = 0; i < n; i++) >>> \ >>> + out[i] = in[i] OP out[i] * x; >>> \ >>> + } >>> +#define DEF_VF_MULOP_CASE_0_WRAP(T, OP, NAME) DEF_VF_MULOP_CASE_0(T, OP, >>> NAME) >>> +#define RUN_VF_MULOP_CASE_0(T, NAME, out, in, x, n) >>> \ >>> + test_vf_mulop_##NAME##_##T##_case_0(out, in, x, n) >>> +#define RUN_VF_MULOP_CASE_0_WRAP(T, NAME, out, in, x, n) >>> \ >>> + RUN_VF_MULOP_CASE_0(T, NAME, out, in, x, n) >>> + >>> +#define VF_MULOP_BODY(op) >>> \ >>> + out[k + 0] = in[k + 0] op tmp * out[k + 0]; >>> \ >>> + out[k + 1] = in[k + 1] op tmp * out[k + 1]; >>> \ >>> + k += 2; >>> + >>> +#define VF_MULOP_BODY_X4(op) >>> \ >>> + VF_MULOP_BODY(op) >>> \ >>> + VF_MULOP_BODY(op) >>> + >>> +#define VF_MULOP_BODY_X8(op) >>> \ >>> + VF_MULOP_BODY_X4(op) >>> \ >>> + VF_MULOP_BODY_X4(op) >>> + >>> +#define VF_MULOP_BODY_X16(op) >>> \ >>> + VF_MULOP_BODY_X8(op) >>> \ >>> + VF_MULOP_BODY_X8(op) >>> + >>> +#define VF_MULOP_BODY_X32(op) >>> \ >>> + VF_MULOP_BODY_X16(op) >>> \ >>> + VF_MULOP_BODY_X16(op) >>> + >>> +#define VF_MULOP_BODY_X64(op) >>> \ >>> + VF_MULOP_BODY_X32(op) >>> \ >>> + VF_MULOP_BODY_X32(op) >>> + >>> +#define VF_MULOP_BODY_X128(op) >>> \ >>> + VF_MULOP_BODY_X64(op) >>> \ >>> + VF_MULOP_BODY_X64(op) >>> + >>> +#define DEF_VF_MULOP_CASE_1(T, OP, NAME, BODY) >>> \ >>> + void test_vf_mulop_##NAME##_##T##_case_1(T *restrict out, T *restrict >>> in, \ >>> + T x, unsigned n) { >>> \ >>> + unsigned k = 0; >>> \ >>> + T tmp = x + 3; >>> \ >>> + >>> \ >>> + while (k < n) { >>> \ >>> + tmp = tmp * 0x3f; >>> \ >>> + BODY(OP) >>> \ >>> + } >>> \ >>> + } >>> +#define DEF_VF_MULOP_CASE_1_WRAP(T, OP, NAME, BODY) >>> \ >>> + DEF_VF_MULOP_CASE_1(T, OP, NAME, BODY) >>> + >>> +#endif >>> diff --git >>> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_data.h >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_data.h >>> new file mode 100644 >>> index 00000000000..c16c1a971f7 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_data.h >>> @@ -0,0 +1,413 @@ >>> +#ifndef HAVE_DEFINED_VF_MULOP_DATA_H >>> +#define HAVE_DEFINED_VF_MULOP_DATA_H >>> + >>> +#define N 16 >>> + >>> +#define TEST_MULOP_DATA(T, NAME) test_##T##_##NAME##_data >>> +#define TEST_MULOP_DATA_WRAP(T, NAME) TEST_MULOP_DATA(T, NAME) >>> + >>> + >>> +_Float16 TEST_MULOP_DATA(_Float16, add)[][4][N] = >>> +{ >>> + { >>> + { 0.30f16 }, >>> + { >>> + 1.48f16, 1.48f16, 1.48f16, 1.48f16, >>> + 0.80f16, 0.80f16, 0.80f16, 0.80f16, >>> + 0.62f16, 0.62f16, 0.62f16, 0.62f16, >>> + 1.18f16, 1.18f16, 1.18f16, 1.18f16, >>> + }, >>> + { >>> + 1.25f16, 1.25f16, 1.25f16, 1.25f16, >>> + 1.89f16, 1.89f16, 1.89f16, 1.89f16, >>> + 1.57f16, 1.57f16, 1.57f16, 1.57f16, >>> + 1.21f16, 1.21f16, 1.21f16, 1.21f16, >>> + }, >>> + { >>> + 1.85f16, 1.85f16, 1.85f16, 1.85f16, >>> + 1.37f16, 1.37f16, 1.37f16, 1.37f16, >>> + 1.09f16, 1.09f16, 1.09f16, 1.09f16, >>> + 1.54f16, 1.54f16, 1.54f16, 1.54f16, >>> + } >>> + }, >>> + { >>> + { -0.505f16 }, >>> + { >>> + -2.38f16, -2.38f16, -2.38f16, -2.38f16, >>> + -2.06f16, -2.06f16, -2.06f16, -2.06f16, >>> + -1.69f16, -1.69f16, -1.69f16, -1.69f16, >>> + -1.1f16, -1.1f16, -1.1f16, -1.1f16, >>> + }, >>> + { >>> + -1.77f16, -1.77f16, -1.77f16, -1.77f16, >>> + -1.6f16, -1.6f16, -1.6f16, -1.6f16, >>> + -1.f16, -1.f16, -1.f16, -1.f16, >>> + -1.23f16, -1.23f16, -1.23f16, -1.23f16, >>> + }, >>> + { >>> + -1.49f16, -1.49f16, -1.49f16, -1.49f16, >>> + -1.25f16, -1.25f16, -1.25f16, -1.25f16, >>> + -1.18f16, -1.18f16, -1.18f16, -1.18f16, >>> + -0.479f16, -0.479f16, -0.479f16, -0.479f16, >>> + } >>> + }, >>> + { >>> + { 4.95e-04f16 }, >>> + { >>> + 1.4266e-05f16, 1.4266e-05f16, 1.4266e-05f16, 1.4266e-05f16, >>> + 1.8129e-05f16, 1.8129e-05f16, 1.8129e-05f16, 1.8129e-05f16, >>> + -8.4710e-06f16, -8.4710e-06f16, -8.4710e-06f16, -8.4710e-06f16, >>> + 3.7876e-05f16, 3.7876e-05f16, 3.7876e-05f16, 3.7876e-05f16, >>> + }, >>> + { >>> + 2.2808e-02f16, 2.2808e-02f16, 2.2808e-02f16, 2.2808e-02f16, >>> + 3.9633e-02f16, 3.9633e-02f16, 3.9633e-02f16, 3.9633e-02f16, >>> + 9.9657e-02f16, 9.9657e-02f16, 9.9657e-02f16, 9.9657e-02f16, >>> + 7.7189e-02f16, 7.7189e-02f16, 7.7189e-02f16, 7.7189e-02f16, >>> + }, >>> + { >>> + 2.5547e-05f16, 2.5547e-05f16, 2.5547e-05f16, 2.5547e-05f16, >>> + 3.7732e-05f16, 3.7732e-05f16, 3.7732e-05f16, 3.7732e-05f16, >>> + 4.0820e-05f16, 4.0820e-05f16, 4.0820e-05f16, 4.0820e-05f16, >>> + 7.6054e-05f16, 7.6054e-05f16, 7.6054e-05f16, 7.6054e-05f16, >>> + } >>> + }, >>> +}; >>> + >>> +float TEST_MULOP_DATA(float, add)[][4][N] = >>> +{ >>> + { >>> + { 43.71f }, >>> + { >>> + -410.28f, -410.28f, -410.28f, -410.28f, >>> + -276.91f, -276.91f, -276.91f, -276.91f, >>> + -103.38f, -103.38f, -103.38f, -103.38f, >>> + -378.24f, -378.24f, -378.24f, -378.24f, >>> + }, >>> + { >>> + 9.56f, 9.56f, 9.56f, 9.56f, >>> + 6.39f, 6.39f, 6.39f, 6.39f, >>> + 2.40f, 2.40f, 2.40f, 2.40f, >>> + 8.80f, 8.80f, 8.80f, 8.80f, >>> + }, >>> + { >>> + 7.59f, 7.59f, 7.59f, 7.59f, >>> + 2.40f, 2.40f, 2.40f, 2.40f, >>> + 1.52f, 1.52f, 1.52f, 1.52f, >>> + 6.41f, 6.41f, 6.41f, 6.41f, >>> + } >>> + }, >>> + { >>> + { 2.04f }, >>> + { >>> + -110.22f, -110.22f, -110.22f, -110.22f, >>> + -25.13f, -25.13f, -25.13f, -25.13f, >>> + -108.18f, -108.18f, -108.18f, -108.18f, >>> + -107.14f, -107.14f, -107.14f, -107.14f, >>> + }, >>> + { >>> + 64.82f, 64.82f, 64.82f, 64.82f, >>> + 31.65f, 31.65f, 31.65f, 31.65f, >>> + 87.32f, 87.32f, 87.32f, 87.32f, >>> + 58.70f, 58.70f, 58.70f, 58.70f, >>> + }, >>> + { >>> + 22.01f, 22.01f, 22.01f, 22.01f, >>> + 39.44f, 39.44f, 39.44f, 39.44f, >>> + 69.95f, 69.95f, 69.95f, 69.95f, >>> + 12.61f, 12.61f, 12.61f, 12.61f, >>> + } >>> + }, >>> + { >>> + { 20.35f }, >>> + { >>> + 881.43f, 881.43f, 881.43f, 881.43f, >>> + 3300.17f, 3300.17f, 3300.17f, 3300.17f, >>> + 5217.85f, 5217.85f, 5217.85f, 5217.85f, >>> + 66.57f, 66.57f, 66.57f, 66.57f, >>> + }, >>> + { >>> + 64.82f, 64.82f, 64.82f, 64.82f, >>> + 31.65f, 31.65f, 31.65f, 31.65f, >>> + 87.32f, 87.32f, 87.32f, 87.32f, >>> + 58.70f, 58.70f, 58.70f, 58.70f, >>> + }, >>> + { >>> + 2200.52f, 2200.52f, 2200.52f, 2200.52f, >>> + 3944.25f, 3944.25f, 3944.25f, 3944.25f, >>> + 6994.81f, 6994.81f, 6994.81f, 6994.81f, >>> + 1261.12f, 1261.12f, 1261.12f, 1261.12f, >>> + } >>> + }, >>> +}; >>> + >>> +double TEST_MULOP_DATA(double, add)[][4][N] = >>> +{ >>> + { >>> + { 1.16e+12 }, >>> + { >>> + 1.8757e+45, 1.8757e+45, 1.8757e+45, 1.8757e+45, >>> + 7.5140e+45, 7.5140e+45, 7.5140e+45, 7.5140e+45, >>> + 8.2069e+45, 8.2069e+45, 8.2069e+45, 8.2069e+45, >>> + 4.9456e+45, 4.9456e+45, 4.9456e+45, 4.9456e+45, >>> + }, >>> + { >>> + 9.0242e+32, 9.0242e+32, 9.0242e+32, 9.0242e+32, >>> + 3.6908e+32, 3.6908e+32, 3.6908e+32, 3.6908e+32, >>> + 3.9202e+32, 3.9202e+32, 3.9202e+32, 3.9202e+32, >>> + 5.0276e+32, 5.0276e+32, 5.0276e+32, 5.0276e+32, >>> + }, >>> + { >>> + 2.9201e+45, 2.9201e+45, 2.9201e+45, 2.9201e+45, >>> + 7.9411e+45, 7.9411e+45, 7.9411e+45, 7.9411e+45, >>> + 8.6606e+45, 8.6606e+45, 8.6606e+45, 8.6606e+45, >>> + 5.5275e+45, 5.5275e+45, 5.5275e+45, 5.5275e+45, >>> + } >>> + }, >>> + { >>> + { -7.29e+23 }, >>> + { >>> + -6.4993e+65, -6.4993e+65, -6.4993e+65, -6.4993e+65, >>> + -4.6760e+65, -4.6760e+65, -4.6760e+65, -4.6760e+65, >>> + -8.1564e+65, -8.1564e+65, -8.1564e+65, -8.1564e+65, >>> + -8.2899e+65, -8.2899e+65, -8.2899e+65, -8.2899e+65, >>> + }, >>> + { >>> + -7.7764e+41, -7.7764e+41, -7.7764e+41, -7.7764e+41, >>> + -1.9756e+41, -1.9756e+41, -1.9756e+41, -1.9756e+41, >>> + -4.8980e+41, -4.8980e+41, -4.8980e+41, -4.8980e+41, >>> + -8.1062e+41, -8.1062e+41, -8.1062e+41, -8.1062e+41, >>> + }, >>> + { >>> + -8.2928e+64, -8.2928e+64, -8.2928e+64, -8.2928e+64, >>> + -3.2356e+65, -3.2356e+65, -3.2356e+65, -3.2356e+65, >>> + -4.5850e+65, -4.5850e+65, -4.5850e+65, -4.5850e+65, >>> + -2.3794e+65, -2.3794e+65, -2.3794e+65, -2.3794e+65, >>> + } >>> + }, >>> + { >>> + { 2.02e-03 }, >>> + { >>> + -1.2191e-35, -1.2191e-35, -1.2191e-35, -1.2191e-35, >>> + -1.0471e-36, -1.0471e-36, -1.0471e-36, -1.0471e-36, >>> + -9.7582e-36, -9.7582e-36, -9.7582e-36, -9.7582e-36, >>> + -2.2097e-36, -2.2097e-36, -2.2097e-36, -2.2097e-36, >>> + }, >>> + { >>> + 9.7703e-33, 9.7703e-33, 9.7703e-33, 9.7703e-33, >>> + 4.1632e-33, 4.1632e-33, 4.1632e-33, 4.1632e-33, >>> + 8.1964e-33, 8.1964e-33, 8.1964e-33, 8.1964e-33, >>> + 4.7314e-33, 4.7314e-33, 4.7314e-33, 4.7314e-33, >>> + }, >>> + { >>> + 7.5586e-36, 7.5586e-36, 7.5586e-36, 7.5586e-36, >>> + 7.3684e-36, 7.3684e-36, 7.3684e-36, 7.3684e-36, >>> + 6.8101e-36, 6.8101e-36, 6.8101e-36, 6.8101e-36, >>> + 7.3543e-36, 7.3543e-36, 7.3543e-36, 7.3543e-36, >>> + } >>> + }, >>> +}; >>> + >>> +_Float16 TEST_MULOP_DATA(_Float16, sub)[][4][N] = >>> +{ >>> + { >>> + { 0.676f16 }, >>> + { >>> + 1.39f16, 1.39f16, 1.39f16, 1.39f16, >>> + 1.68f16, 1.68f16, 1.68f16, 1.68f16, >>> + 1.63f16, 1.63f16, 1.63f16, 1.63f16, >>> + 2.12f16, 2.12f16, 2.12f16, 2.12f16, >>> + }, >>> + { >>> + 1.04f16, 1.04f16, 1.04f16, 1.04f16, >>> + 1.64f16, 1.64f16, 1.64f16, 1.64f16, >>> + 1.95f16, 1.95f16, 1.95f16, 1.95f16, >>> + 1.39f16, 1.39f16, 1.39f16, 1.39f16, >>> + }, >>> + { >>> + 0.687f16, 0.687f16, 0.687f16, 0.687f16, >>> + 0.568f16, 0.568f16, 0.568f16, 0.568f16, >>> + 0.315f16, 0.315f16, 0.315f16, 0.315f16, >>> + 1.18f16, 1.18f16, 1.18f16, 1.18f16, >>> + } >>> +}, >>> + { >>> + { -0.324f16 }, >>> + { >>> + -0.679f16, -0.679f16, -0.679f16, -0.679f16, >>> + -0.992f16, -0.992f16, -0.992f16, -0.992f16, >>> + -1.34f16, -1.34f16, -1.34f16, -1.34f16, >>> + -0.297f16, -0.297f16, -0.297f16, -0.297f16, >>> + }, >>> + { >>> + -1.96f16, -1.96f16, -1.96f16, -1.96f16, >>> + -1.36f16, -1.36f16, -1.36f16, -1.36f16, >>> + -1.05f16, -1.05f16, -1.05f16, -1.05f16, >>> + -1.61f16, -1.61f16, -1.61f16, -1.61f16, >>> + }, >>> + { >>> + -1.31f16, -1.31f16, -1.31f16, -1.31f16, >>> + -1.43f16, -1.43f16, -1.43f16, -1.43f16, >>> + -1.68f16, -1.68f16, -1.68f16, -1.68f16, >>> + -0.82f16, -0.82f16, -0.82f16, -0.82f16, >>> + } >>> + }, >>> + { >>> + { 7.08e+01f16 }, >>> + { >>> + 4.49e+03f16, 4.49e+03f16, 4.49e+03f16, 4.49e+03f16, >>> + 7.73e+03f16, 7.73e+03f16, 7.73e+03f16, 7.73e+03f16, >>> + 8.42e+03f16, 8.42e+03f16, 8.42e+03f16, 8.42e+03f16, >>> + 9.12e+03f16, 9.12e+03f16, 9.12e+03f16, 9.12e+03f16, >>> + }, >>> + { >>> + 1.40e+01f16, 1.40e+01f16, 1.40e+01f16, 1.40e+01f16, >>> + 6.80e+01f16, 6.80e+01f16, 6.80e+01f16, 6.80e+01f16, >>> + 9.54e+01f16, 9.54e+01f16, 9.54e+01f16, 9.54e+01f16, >>> + 4.49e+01f16, 4.49e+01f16, 4.49e+01f16, 4.49e+01f16, >>> + }, >>> + { >>> + 3.50e+03f16, 3.50e+03f16, 3.50e+03f16, 3.50e+03f16, >>> + 2.91e+03f16, 2.91e+03f16, 2.91e+03f16, 2.91e+03f16, >>> + 1.66e+03f16, 1.66e+03f16, 1.66e+03f16, 1.66e+03f16, >>> + 5.94e+03f16, 5.94e+03f16, 5.94e+03f16, 5.94e+03f16, >>> + } >>> + }, >>> +}; >>> + >>> +float TEST_MULOP_DATA(float, sub)[][4][N] = >>> +{ >>> + { >>> + {8.51f }, >>> + { >>> + 24.21f, 24.21f, 24.21f, 24.21f, >>> + 40.31f, 40.31f, 40.31f, 40.31f, >>> + 59.68f, 59.68f, 59.68f, 59.68f, >>> + 45.42f, 45.42f, 45.42f, 45.42f, >>> + }, >>> + { >>> + 1.94f, 1.94f, 1.94f, 1.94f, >>> + 4.24f, 4.24f, 4.24f, 4.24f, >>> + 6.48f, 6.48f, 6.48f, 6.48f, >>> + 4.68f, 4.68f, 4.68f, 4.68f, >>> + }, >>> + { >>> + 7.70f, 7.70f, 7.70f, 7.70f, >>> + 4.23f, 4.23f, 4.23f, 4.23f, >>> + 4.54f, 4.54f, 4.54f, 4.54f, >>> + 5.59f, 5.59f, 5.59f, 5.59f, >>> + }, >>> +}, >>> + { >>> + { 85.14f }, >>> + { >>> + 1731.29f, 1731.29f, 1731.29f, 1731.29f, >>> + 3656.53f, 3656.53f, 3656.53f, 3656.53f, >>> + 5565.07f, 5565.07f, 5565.07f, 5565.07f, >>> + 4042.14f, 4042.14f, 4042.14f, 4042.14f, >>> + }, >>> + { >>> + 19.43f, 19.43f, 19.43f, 19.43f, >>> + 42.45f, 42.45f, 42.45f, 42.45f, >>> + 64.83f, 64.83f, 64.83f, 64.83f, >>> + 46.82f, 46.82f, 46.82f, 46.82f, >>> + }, >>> + { >>> + 77.02f, 77.02f, 77.02f, 77.02f, >>> + 42.34f, 42.34f, 42.34f, 42.34f, >>> + 45.44f, 45.44f, 45.44f, 45.44f, >>> + 55.89f, 55.89f, 55.89f, 55.89f, >>> + } >>> + }, >>> + { >>> + { 99.01f }, >>> + { >>> + 6240.43f, 6240.43f, 6240.43f, 6240.43f, >>> + 2179.23f, 2179.23f, 2179.23f, 2179.23f, >>> + 5346.65f, 5346.65f, 5346.65f, 5346.65f, >>> + 2649.91f, 2649.91f, 2649.91f, 2649.91f, >>> + }, >>> + { >>> + 59.46f, 59.46f, 59.46f, 59.46f, >>> + 16.96f, 16.96f, 16.96f, 16.96f, >>> + 52.55f, 52.55f, 52.55f, 52.55f, >>> + 24.70f, 24.70f, 24.70f, 24.70f, >>> + }, >>> + { >>> + 353.30f, 353.30f, 353.30f, 353.30f, >>> + 500.02f, 500.02f, 500.02f, 500.02f, >>> + 143.67f, 143.67f, 143.67f, 143.67f, >>> + 204.36f, 204.36f, 204.36f, 204.36f, >>> + } >>> + }, >>> +}; >>> + >>> +double TEST_MULOP_DATA(double, sub)[][4][N] = >>> +{ >>> + { >>> + { 80.54 }, >>> + { >>> + 5731.60, 5731.60, 5731.60, 5731.60, >>> + 6682.41, 6682.41, 6682.41, 6682.41, >>> + 7737.53, 7737.53, 7737.53, 7737.53, >>> + 4922.68, 4922.68, 4922.68, 4922.68, >>> + }, >>> + { >>> + 67.14, 67.14, 67.14, 67.14, >>> + 78.23, 78.23, 78.23, 78.23, >>> + 94.35, 94.35, 94.35, 94.35, >>> + 49.68, 49.68, 49.68, 49.68, >>> + }, >>> + { >>> + 324.14, 324.14, 324.14, 324.14, >>> + 381.77, 381.77, 381.77, 381.77, >>> + 138.58, 138.58, 138.58, 138.58, >>> + 921.45, 921.45, 921.45, 921.45, >>> + } >>> + }, >>> + { >>> + { 8.05e+01 }, >>> + { >>> + 8.65e+27, 8.65e+27, 8.65e+27, 8.65e+27, >>> + 1.01e+28, 1.01e+28, 1.01e+28, 1.01e+28, >>> + 8.99e+27, 8.99e+27, 8.99e+27, 8.99e+27, >>> + 1.32e+28, 1.32e+28, 1.32e+28, 1.32e+28, >>> + }, >>> + { >>> + 6.71e+25, 6.71e+25, 6.71e+25, 6.71e+25, >>> + 7.82e+25, 7.82e+25, 7.82e+25, 7.82e+25, >>> + 9.44e+25, 9.44e+25, 9.44e+25, 9.44e+25, >>> + 4.97e+25, 4.97e+25, 4.97e+25, 4.97e+25, >>> + }, >>> + { >>> + 3.24e+27, 3.24e+27, 3.24e+27, 3.24e+27, >>> + 3.82e+27, 3.82e+27, 3.82e+27, 3.82e+27, >>> + 1.39e+27, 1.39e+27, 1.39e+27, 1.39e+27, >>> + 9.21e+27, 9.21e+27, 9.21e+27, 9.21e+27, >>> + } >>> + }, >>> + { >>> + { 2.02e-03 }, >>> + { >>> + 2.7308e-35, 2.7308e-35, 2.7308e-35, 2.7308e-35, >>> + 1.5784e-35, 1.5784e-35, 1.5784e-35, 1.5784e-35, >>> + 2.3378e-35, 2.3378e-35, 2.3378e-35, 2.3378e-35, >>> + 1.6918e-35, 1.6918e-35, 1.6918e-35, 1.6918e-35, >>> + }, >>> + { >>> + 9.7703e-33, 9.7703e-33, 9.7703e-33, 9.7703e-33, >>> + 4.1632e-33, 4.1632e-33, 4.1632e-33, 4.1632e-33, >>> + 8.1964e-33, 8.1964e-33, 8.1964e-33, 8.1964e-33, >>> + 4.7314e-33, 4.7314e-33, 4.7314e-33, 4.7314e-33, >>> + }, >>> + { >>> + 7.5586e-36, 7.5586e-36, 7.5586e-36, 7.5586e-36, >>> + 7.3684e-36, 7.3684e-36, 7.3684e-36, 7.3684e-36, >>> + 6.8101e-36, 6.8101e-36, 6.8101e-36, 6.8101e-36, >>> + 7.3543e-36, 7.3543e-36, 7.3543e-36, 7.3543e-36, >>> + } >>> + }, >>> +}; >>> + >>> + >>> +#endif >>> diff --git >>> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_run.h >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_run.h >>> new file mode 100644 >>> index 00000000000..bc6f483deed >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_mulop_run.h >>> @@ -0,0 +1,34 @@ >>> +#ifndef HAVE_DEFINED_VF_MULOP_RUN_H >>> +#define HAVE_DEFINED_VF_MULOP_RUN_H >>> + >>> +#include <math.h> >>> + >>> +#define TYPE_FABS(x, T) >>> \ >>> + (__builtin_types_compatible_p (T, double) ? fabs (x) : fabsf (x)) >>> + >>> +int >>> +main () >>> +{ >>> + unsigned i, k; >>> + >>> + for (i = 0; i < sizeof (TEST_DATA) / sizeof (TEST_DATA[0]); i++) >>> + { >>> + T x = TEST_DATA[i][0][0]; >>> + T *in = TEST_DATA[i][1]; >>> + T *out = TEST_DATA[i][2]; >>> + T *expect = TEST_DATA[i][3]; >>> + >>> + TEST_RUN (T, NAME, out, in, x, N); >>> + >>> + for (k = 0; k < N; k++) >>> + { >>> + T diff = expect[k] - out[k]; >>> + if (TYPE_FABS (diff, T) > .01 * TYPE_FABS (expect[k], T)) >>> + __builtin_abort (); >>> + } >>> + } >>> + >>> + return 0; >>> +} >>> + >>> +#endif >>> diff --git >>> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c >>> new file mode 100644 >>> index 00000000000..1bcf9e075fe >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f16.c >>> @@ -0,0 +1,15 @@ >>> +/* { dg-do run { target { riscv_v } } } */ >>> +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ >>> + >>> +#include "vf_mulop.h" >>> +#include "vf_mulop_data.h" >>> + >>> +#define T _Float16 >>> +#define NAME add >>> + >>> +DEF_VF_MULOP_CASE_0_WRAP(T, +, NAME) >>> + >>> +#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME) >>> +#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, >>> out, in, x, n) >>> + >>> +#include "vf_mulop_run.h" >>> diff --git >>> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f32.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f32.c >>> new file mode 100644 >>> index 00000000000..199b9adc738 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f32.c >>> @@ -0,0 +1,15 @@ >>> +/* { dg-do run { target { riscv_v } } } */ >>> +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ >>> + >>> +#include "vf_mulop.h" >>> +#include "vf_mulop_data.h" >>> + >>> +#define T float >>> +#define NAME add >>> + >>> +DEF_VF_MULOP_CASE_0_WRAP(T, +, NAME) >>> + >>> +#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME) >>> +#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, >>> out, in, x, n) >>> + >>> +#include "vf_mulop_run.h" >>> diff --git >>> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f64.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f64.c >>> new file mode 100644 >>> index 00000000000..3857f586cc9 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmadd-run-1-f64.c >>> @@ -0,0 +1,15 @@ >>> +/* { dg-do run { target { riscv_v } } } */ >>> +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ >>> + >>> +#include "vf_mulop.h" >>> +#include "vf_mulop_data.h" >>> + >>> +#define T double >>> +#define NAME add >>> + >>> +DEF_VF_MULOP_CASE_0_WRAP(T, +, NAME) >>> + >>> +#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME) >>> +#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, >>> out, in, x, n) >>> + >>> +#include "vf_mulop_run.h" >>> diff --git >>> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c >>> new file mode 100644 >>> index 00000000000..671c7d83d9c >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f16.c >>> @@ -0,0 +1,15 @@ >>> +/* { dg-do run { target { riscv_v } } } */ >>> +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ >>> + >>> +#include "vf_mulop.h" >>> +#include "vf_mulop_data.h" >>> + >>> +#define T _Float16 >>> +#define NAME sub >>> + >>> +DEF_VF_MULOP_CASE_0_WRAP(T, -, NAME) >>> + >>> +#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME) >>> +#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, >>> out, in, x, n) >>> + >>> +#include "vf_mulop_run.h" >>> diff --git >>> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f32.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f32.c >>> new file mode 100644 >>> index 00000000000..f89696373c3 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f32.c >>> @@ -0,0 +1,15 @@ >>> +/* { dg-do run { target { riscv_v } } } */ >>> +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ >>> + >>> +#include "vf_mulop.h" >>> +#include "vf_mulop_data.h" >>> + >>> +#define T float >>> +#define NAME sub >>> + >>> +DEF_VF_MULOP_CASE_0_WRAP(T, -, NAME) >>> + >>> +#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME) >>> +#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, >>> out, in, x, n) >>> + >>> +#include "vf_mulop_run.h" >>> diff --git >>> a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f64.c >>> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f64.c >>> new file mode 100644 >>> index 00000000000..b42ab1eff7d >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfmsub-run-1-f64.c >>> @@ -0,0 +1,15 @@ >>> +/* { dg-do run { target { riscv_v } } } */ >>> +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ >>> + >>> +#include "vf_mulop.h" >>> +#include "vf_mulop_data.h" >>> + >>> +#define T double >>> +#define NAME sub >>> + >>> +DEF_VF_MULOP_CASE_0_WRAP(T, -, NAME) >>> + >>> +#define TEST_DATA TEST_MULOP_DATA_WRAP(T, NAME) >>> +#define TEST_RUN(T, NAME, out, in, x, n) RUN_VF_MULOP_CASE_0_WRAP(T, NAME, >>> out, in, x, n) >>> + >>> +#include "vf_mulop_run.h" >