LGTM and ok for gcc 14 as well, btw an idea is that actually could passed via gpr, I mean fpr->gpr and then vmv.v.x, but it's not block commend for this patch.
钟居哲 <juzhe.zh...@rivai.ai> 於 2024年7月3日 週三 22:18 寫道: > LGTM。 > > ------------------------------ > juzhe.zh...@rivai.ai > > > *From:* pan2.li <pan2...@intel.com> > *Date:* 2024-07-03 22:17 > *To:* gcc-patches <gcc-patches@gcc.gnu.org> > *CC:* juzhe.zhong <juzhe.zh...@rivai.ai>; kito.cheng > <kito.ch...@gmail.com>; jeffreyalaw <jeffreya...@gmail.com>; rdapp.gcc > <rdapp....@gmail.com>; Pan Li <pan2...@intel.com> > *Subject:* [PATCH v1] RISC-V: Bugfix vfmv insn honor zvfhmin for FP16 SEW > [PR115763] > From: Pan Li <pan2...@intel.com> > > According to the ISA, the zvfhmin sub extension should only contain > convertion insn. Thus, the vfmv insn acts on FP16 should not be > present when only the zvfhmin option is given. > > This patch would like to fix it by split the pred_broadcast define_insn > into zvfhmin and zvfh part. Given below example: > > void test (_Float16 *dest, _Float16 bias) { > dest[0] = bias; > dest[1] = bias; > } > > when compile with -march=rv64gcv_zfh_zvfhmin > > Before this patch: > test: > vsetivli zero,2,e16,mf4,ta,ma > vfmv.v.f v1,fa0 // should not leverage vfmv for zvfhmin > vse16.v v1,0(a0) > ret > > After this patch: > test: > addi sp,sp,-16 > fsh fa0,14(sp) > addi a5,sp,14 > vsetivli zero,2,e16,mf4,ta,ma > vlse16.v v1,0(a5),zero > vse16.v v1,0(a0) > addi sp,sp,16 > jr ra > > PR target/115763 > > gcc/ChangeLog: > > * config/riscv/vector.md (*pred_broadcast<mode>): Split into > zvfh and zvfhmin part. > (*pred_broadcast<mode>_zvfh): New define_insn for zvfh part. > (*pred_broadcast<mode>_zvfhmin): Ditto but for zvfhmin. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/scalar_move-5.c: Adjust asm check. > * gcc.target/riscv/rvv/base/scalar_move-6.c: Ditto. > * gcc.target/riscv/rvv/base/scalar_move-7.c: Ditto. > * gcc.target/riscv/rvv/base/scalar_move-8.c: Ditto. > * gcc.target/riscv/rvv/base/pr115763-1.c: New test. > * gcc.target/riscv/rvv/base/pr115763-2.c: New test. > > Signed-off-by: Pan Li <pan2...@intel.com> > --- > gcc/config/riscv/vector.md | 49 +++++++++++++------ > .../gcc.target/riscv/rvv/base/pr115763-1.c | 9 ++++ > .../gcc.target/riscv/rvv/base/pr115763-2.c | 10 ++++ > .../gcc.target/riscv/rvv/base/scalar_move-5.c | 4 +- > .../gcc.target/riscv/rvv/base/scalar_move-6.c | 6 +-- > .../gcc.target/riscv/rvv/base/scalar_move-7.c | 6 +-- > .../gcc.target/riscv/rvv/base/scalar_move-8.c | 6 +-- > 7 files changed, 64 insertions(+), 26 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c > > diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > index fe18ee5b5f7..d9474262d54 100644 > --- a/gcc/config/riscv/vector.md > +++ b/gcc/config/riscv/vector.md > @@ -2080,31 +2080,50 @@ (define_insn_and_split "*pred_broadcast<mode>" > [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv") > (set_attr "mode" "<MODE>")]) > -(define_insn "*pred_broadcast<mode>" > - [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, > vr, vr, vr, vr, vr, vr, vr") > - (if_then_else:V_VLSF_ZVFHMIN > +(define_insn "*pred_broadcast<mode>_zvfh" > + [(set (match_operand:V_VLSF 0 "register_operand" "=vr, > vr, vr, vr") > + (if_then_else:V_VLSF > (unspec:<VM> > - [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1,Wc1, vm, > vm,Wc1,Wc1,Wb1,Wb1") > - (match_operand 4 "vector_length_operand" " rK, rK, rK, > rK, rK, rK, rK, rK") > - (match_operand 5 "const_int_operand" " i, i, > i, i, i, i, i, i") > - (match_operand 6 "const_int_operand" " i, i, > i, i, i, i, i, i") > - (match_operand 7 "const_int_operand" " i, i, > i, i, i, i, i, i") > + [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1, Wc1, > Wb1, Wb1") > + (match_operand 4 "vector_length_operand" " rK, rK, > rK, rK") > + (match_operand 5 "const_int_operand" " i, i, > i, i") > + (match_operand 6 "const_int_operand" " i, i, > i, i") > + (match_operand 7 "const_int_operand" " i, i, > i, i") > (reg:SI VL_REGNUM) > (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > - (vec_duplicate:V_VLSF_ZVFHMIN > - (match_operand:<VEL> 3 "direct_broadcast_operand" " f, > f,Wdm,Wdm,Wdm,Wdm, f, f")) > - (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand" "vu, 0, > vu, 0, vu, 0, vu, 0")))] > + (vec_duplicate:V_VLSF > + (match_operand:<VEL> 3 "direct_broadcast_operand" " f, f, > f, f")) > + (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, > vu, 0")))] > "TARGET_VECTOR" > "@ > vfmv.v.f\t%0,%3 > vfmv.v.f\t%0,%3 > + vfmv.s.f\t%0,%3 > + vfmv.s.f\t%0,%3" > + [(set_attr "type" "vfmov,vfmov,vfmovfv,vfmovfv") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "*pred_broadcast<mode>_zvfhmin" > + [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" > "=vr, vr, vr, vr") > + (if_then_else:V_VLSF_ZVFHMIN > + (unspec:<VM> > + [(match_operand:<VM> 1 "vector_broadcast_mask_operand" " vm, > vm, Wc1, Wc1") > + (match_operand 4 "vector_length_operand" " rK, > rK, rK, rK") > + (match_operand 5 "const_int_operand" " > i, i, i, i") > + (match_operand 6 "const_int_operand" " > i, i, i, i") > + (match_operand 7 "const_int_operand" " > i, i, i, i") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > + (vec_duplicate:V_VLSF_ZVFHMIN > + (match_operand:<VEL> 3 "direct_broadcast_operand" "Wdm, > Wdm, Wdm, Wdm")) > + (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand" " > vu, 0, vu, 0")))] > + "TARGET_VECTOR" > + "@ > vlse<sew>.v\t%0,%3,zero,%1.t > vlse<sew>.v\t%0,%3,zero,%1.t > vlse<sew>.v\t%0,%3,zero > - vlse<sew>.v\t%0,%3,zero > - vfmv.s.f\t%0,%3 > - vfmv.s.f\t%0,%3" > - [(set_attr "type" "vfmov,vfmov,vlds,vlds,vlds,vlds,vfmovfv,vfmovfv") > + vlse<sew>.v\t%0,%3,zero" > + [(set_attr "type" "vlds,vlds,vlds,vlds") > (set_attr "mode" "<MODE>")]) > (define_insn "*pred_broadcast<mode>_extended_scalar" > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c > new file mode 100644 > index 00000000000..3b0b0046041 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-1.c > @@ -0,0 +1,9 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv_zfh_zvfh -mabi=lp64d -O3 -ftree-vectorize > -fno-vect-cost-model" } */ > + > +void test (_Float16 *dest, _Float16 bias) { > + dest[0] = bias; > + dest[1] = bias; > +} > + > +/* { dg-final { scan-assembler-times {vfmv\.v\.f\s+v[0-9]+,\s*fa[0-9]+} 1 > } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c > new file mode 100644 > index 00000000000..f4d53e72022 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr115763-2.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv_zfh_zvfhmin -mabi=lp64d -O3 > -ftree-vectorize -fno-vect-cost-model" } */ > + > +void test (_Float16 *dest, _Float16 bias) { > + dest[0] = bias; > + dest[1] = bias; > +} > + > +/* { dg-final { scan-assembler-times {fsh\s+fa[0-9]+,[0-9]+\(sp\)} 1 } } > */ > +/* { dg-final { scan-assembler-not {vfmv\.v\.x\s+v[0-9]+,\s*fa[0-9]+} } } > */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c > index 2e897a4896f..04dec7bc8dc 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-5.c > @@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl) > /* > ** foo2: > -** addi\t[a-x0-9]+,\s*[a-x0-9]+,100 > +** fld\tfa[0-9]+,\s*100\(a0\) > ** vsetvli\tzero,a2,e64,m2,t[au],m[au] > -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero > +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+ > ** vs2r.v\tv[0-9]+,0\([a-x0-9]+\) > ** ret > */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c > index 326cfd8e2ff..0ebb92eda42 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-6.c > @@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl) > /* > ** foo2: > -** addi\t[a-x0-9]+,\s*[a-x0-9]+,100 > +** fld\tfa[0-9]+,\s*100\(a0\) > ** vsetvli\tzero,a2,e64,m2,t[au],m[au] > -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero > +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+ > ** vs2r.v\tv[0-9]+,0\([a-x0-9]+\) > ** ret > */ > @@ -52,7 +52,7 @@ void foo3 (void *base, void *out, size_t vl) > /* > ** foo4: > ** ... > -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero > +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+ > ** ... > ** ret > */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c > index b218f2d0ba4..512fa62858a 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-7.c > @@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl) > /* > ** foo2: > -** addi\t[a-x0-9]+,\s*[a-x0-9]+,100 > +** fld\tfa[0-9]+,\s*100\(a0\) > ** vsetvli\tzero,a2,e64,m2,t[au],m[au] > -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero > +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+ > ** vs2r.v\tv[0-9]+,0\([a-x0-9]+\) > ** ret > */ > @@ -52,7 +52,7 @@ void foo3 (void *base, void *out, size_t vl) > /* > ** foo4: > ** ... > -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero > +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+ > ** ... > ** ret > */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c > index 4438e793dbc..d9d10f3702a 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/scalar_move-8.c > @@ -21,9 +21,9 @@ void foo (void *base, void *out, size_t vl) > /* > ** foo2: > -** addi\t[a-x0-9]+,\s*[a-x0-9]+,100 > +** fld\tfa[0-9]+,\s*100\(a0\) > ** vsetvli\tzero,a2,e64,m2,t[au],m[au] > -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero > +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+ > ** vs2r.v\tv[0-9]+,0\([a-x0-9]+\) > ** ret > */ > @@ -52,7 +52,7 @@ void foo3 (void *base, void *out, size_t vl) > /* > ** foo4: > ** ... > -** vlse64.v\tv[0-9]+,0\([a-x0-9]+\),zero > +** vfmv\.v\.f\tv[0-9]+,\s*fa[0-9]+ > ** ... > ** ret > */ > -- > 2.34.1 > > > >