From: Pan Li <pan2...@intel.com> After we enabled the labe-combine pass after the mode-switching pass, it will try to combine below insn patterns into op. Aka:
(insn 40 5 41 2 (set (reg:SI 11 a1 [151]) (reg:SI 69 frm)) "pr118103-simple.c":67:15 2712 {frrmsi} (nil)) (insn 41 40 7 2 (set (reg:SI 69 frm) (const_int 2 [0x2])) "pr118103-simple.c":69:8 2710 {fsrmsi_restore} (nil)) (insn 42 10 11 2 (set (reg:SI 69 frm) (reg:SI 11 a1 [151])) "pr118103-simple.c":70:8 2710 {fsrmsi_restore} (nil)) trying to combine definition of r11 in: 40: a1:SI=frm:SI into: 42: frm:SI=a1:SI instruction becomes a no-op: (set (reg:SI 69 frm) (reg:SI 69 frm)) original cost = 4 + 4 (weighted: 8.000000), replacement cost = 2147483647; keeping replacement rescanning insn with uid = 42. updating insn 42 in-place verify found no changes in insn with uid = 42. deleting insn 40 For example we have code as blow: 9 │ int test_exampe () { 10 │ test (); 11 │ 12 │ size_t vl = 4; 13 │ vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl); 14 │ va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl); 15 │ va = __riscv_vfmsac_vv_f16m1(va, va, va, vl); 16 │ 17 │ __riscv_vse16_v_f16m1(b, va, vl); 18 │ 19 │ return 0; 20 │ } it will be compiled to: 53 │ main: 54 │ addi sp,sp,-16 55 │ sd ra,8(sp) 56 │ call initialize 57 │ lui a6,%hi(b) 58 │ lui a2,%hi(a) 59 │ addi a3,a6,%lo(b) 60 │ addi a2,a2,%lo(a) 61 │ li a4,4 62 │ .L8: 63 │ fsrmi 2 64 │ vsetvli a5,a4,e16,m1,ta,ma 65 │ vle16.v v1,0(a2) 66 │ slli a1,a5,1 67 │ subw a4,a4,a5 68 │ add a2,a2,a1 69 │ vfnmadd.vv v1,v1,v1 >> The fsrm a0 insn is deleted by late-combine << 70 │ vfmsub.vv v1,v1,v1 71 │ vse16.v v1,0(a3) 72 │ add a3,a3,a1 73 │ bgt a4,zero,.L8 74 │ lh a4,%lo(b)(a6) 75 │ li a5,-20480 76 │ addi a5,a5,-1382 77 │ bne a4,a5,.L14 78 │ ld ra,8(sp) 79 │ li a0,0 80 │ addi sp,sp,16 81 │ jr ra This patch would like to add the FRM register to the global_regs as it is a cooperatively-managed global register. And then the fsrm insn will not be eliminated by late-combine. The related spec17 cam4 failure may also caused by this issue too. The below test suites are passed for this patch. * The rv64gcv fully regression test. PR target/118103 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_conditional_register_usage): Add the FRM as the global_regs. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr118103-1.c: New test. * gcc.target/riscv/rvv/base/pr118103-run-1.c: New test. Signed-off-by: Pan Li <pan2...@intel.com> --- gcc/config/riscv/riscv.cc | 4 +- .../gcc.target/riscv/rvv/base/pr118103-1.c | 27 ++++++++++ .../riscv/rvv/base/pr118103-run-1.c | 50 +++++++++++++++++++ 3 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 5a3a0504177..fe24376e9c5 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10885,7 +10885,9 @@ riscv_conditional_register_usage (void) call_used_regs[r] = 1; } - if (!TARGET_HARD_FLOAT) + if (TARGET_HARD_FLOAT) + global_regs[FRM_REGNUM] = 1; + else { for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) fixed_regs[regno] = call_used_regs[regno] = 1; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c new file mode 100644 index 00000000000..1afa5d3afb5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-1.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv_zvfh -mabi=lp64d" } */ + +#include "riscv_vector.h" + +#define N 4 +typedef _Float16 float16_t; +float16_t a[N]; float16_t b[N]; + +extern void test (); + +int test_exampe () { + test (); + + size_t vl = N; + vfloat16m1_t va = __riscv_vle16_v_f16m1(a, vl); + va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl); + va = __riscv_vfmsac_vv_f16m1(va, va, va, vl); + + __riscv_vse16_v_f16m1(b, va, vl); + + return 0; +} + +/* { dg-final { scan-assembler-times {frrm\s+[axs][0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {fsrmi\s+[01234]} 1 } } */ +/* { dg-final { scan-assembler-times {fsrm\s+[axs][0-9]+} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c new file mode 100644 index 00000000000..709e1cc34a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118103-run-1.c @@ -0,0 +1,50 @@ +/* { dg-do run { target { riscv_zvfh } } } */ +/* { dg-options "-O3" } */ + +#include "riscv_vector.h" +#define N 4 +typedef _Float16 float16_t; +float16_t a[N]; float16_t b[N]; + +void initialize () { + uint16_t tmp_0[N] = {43883, 3213, 238, 275, }; + + for (int i = 0; i < N; ++i) + { + union { float16_t f16; uint16_t u16; } converter; + converter.u16 = tmp_0[i]; + a[i] = converter.f16; + } + + for (int i = 0; i < N; ++i) + b[i] = 0; +} + +void compute () +{ + int avl = N; + float16_t* ptr_a = a; float16_t* ptr_b = b; + + for (size_t vl; avl > 0; avl -= vl) + { + vl = __riscv_vsetvl_e16m1(avl); + vfloat16m1_t va = __riscv_vle16_v_f16m1(ptr_a, vl); + va = __riscv_vfnmadd_vv_f16m1_rm(va, va, va, __RISCV_FRM_RDN, vl); + va = __riscv_vfmsac_vv_f16m1(va, va, va, vl); + __riscv_vse16_v_f16m1(ptr_b, va, vl); + ptr_a += vl; ptr_b += vl; + } +} + +int main () +{ + initialize(); + compute(); + + short *tmp = (short *)b; + + if (*tmp != -21862) + __builtin_abort (); + + return 0; +} -- 2.43.0