Ok for trunk and gcc 14 juzhe.zh...@rivai.ai <juzhe.zh...@rivai.ai> 於 2024年7月3日 週三 17:43 寫道:
> LGTM > > ------------------------------ > juzhe.zh...@rivai.ai > > > *From:* Robin Dapp <rdapp....@gmail.com> > *Date:* 2024-07-03 17:39 > *To:* gcc-patches <gcc-patches@gcc.gnu.org> > *CC:* rdapp.gcc <rdapp....@gmail.com>; palmer <pal...@dabbelt.com>; Kito > Cheng <kito.ch...@gmail.com>; juzhe.zh...@rivai.ai; jeffreyalaw > <jeffreya...@gmail.com>; Li, Pan2 <pan2...@intel.com> > *Subject:* [PATCH] RISC-V: Use tu policy for first-element vec_set > [PR115725]. > Hi, > > this patch changes the tail policy for vmv.s.x from ta to tu. > By default the bug does not show up with qemu because qemu's > current vmv.s.x implementation always uses the tail-undisturbed > policy. With a local qemu version that overwrites the tail > with ones when the tail-agnostic policy is specified, the bug > shows. > > Regtested on rv64gcv_zvfh. > > OK for trunk and GCC 14 backport? > > Regards > Robin > > gcc/ChangeLog: > > * config/riscv/autovec.md: Add TU policy. > * config/riscv/riscv-protos.h (enum insn_type): Define > SCALAR_MOVE_MERGED_OP_TU. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c: Adjust > test expectation. > * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c: Ditto. > * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c: Ditto. > * gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c: Ditto. > --- > gcc/config/riscv/autovec.md | 3 ++- > gcc/config/riscv/riscv-protos.h | 4 ++++ > .../riscv/rvv/autovec/vls-vlmax/vec_set-1.c | 12 ++++-------- > .../riscv/rvv/autovec/vls-vlmax/vec_set-2.c | 12 ++++-------- > .../riscv/rvv/autovec/vls-vlmax/vec_set-3.c | 12 ++++-------- > .../riscv/rvv/autovec/vls-vlmax/vec_set-4.c | 12 ++++-------- > 6 files changed, 22 insertions(+), 33 deletions(-) > > diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md > index 1af50a46c4c..aa7dd526804 100644 > --- a/gcc/config/riscv/autovec.md > +++ b/gcc/config/riscv/autovec.md > @@ -1341,7 +1341,8 @@ (define_expand "vec_set<mode>" > { > rtx ops[] = {operands[0], operands[0], operands[1]}; > riscv_vector::emit_nonvlmax_insn (code_for_pred_broadcast > (<MODE>mode), > - riscv_vector::SCALAR_MOVE_MERGED_OP, ops, CONST1_RTX (Pmode)); > + riscv_vector::SCALAR_MOVE_MERGED_OP_TU, > + ops, CONST1_RTX (Pmode)); > } > else > { > diff --git a/gcc/config/riscv/riscv-protos.h > b/gcc/config/riscv/riscv-protos.h > index 39b723a590b..064aa082742 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -524,6 +524,10 @@ enum insn_type : unsigned int > SCALAR_MOVE_MERGED_OP = HAS_DEST_P | HAS_MASK_P | USE_ONE_TRUE_MASK_P > | HAS_MERGE_P | TDEFAULT_POLICY_P | MDEFAULT_POLICY_P > | UNARY_OP_P, > + > + SCALAR_MOVE_MERGED_OP_TU = HAS_DEST_P | HAS_MASK_P | USE_ONE_TRUE_MASK_P > + | HAS_MERGE_P | TU_POLICY_P | MDEFAULT_POLICY_P > + | UNARY_OP_P, > }; > enum vlmul_type > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c > index ecb160933d6..99b0f625c83 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-1.c > @@ -64,14 +64,10 @@ typedef double vnx2df __attribute__((vector_size > (16))); > TEST_ALL1 (VEC_SET) > TEST_ALL_VAR1 (VEC_SET_VAR1) > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m1,\s*ta,\s*ma} 1 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m1,\s*tu,\s*ma} 5 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m1,\s*ta,\s*ma} 2 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m1,\s*tu,\s*ma} 6 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m1,\s*ta,\s*ma} 2 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m1,\s*tu,\s*ma} 6 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m1,\s*ta,\s*ma} 2 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m1,\s*tu,\s*ma} 4 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m1,\s*tu,\s*ma} 6 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m1,\s*tu,\s*ma} 8 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m1,\s*tu,\s*ma} 8 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m1,\s*tu,\s*ma} 6 } } */ > /* { dg-final { scan-assembler-times {\tvmv.v.x} 13 } } */ > /* { dg-final { scan-assembler-times {\tvfmv.v.f} 8 } } */ > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c > index 194abff77cc..64a40308eb1 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-2.c > @@ -76,14 +76,10 @@ typedef double vnx4df __attribute__((vector_size > (32))); > TEST_ALL2 (VEC_SET) > TEST_ALL_VAR2 (VEC_SET_VAR2) > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m2,\s*ta,\s*ma} 1 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m2,\s*tu,\s*ma} 5 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m2,\s*ta,\s*ma} 2 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m2,\s*tu,\s*ma} 10 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m2,\s*ta,\s*ma} 2 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m2,\s*tu,\s*ma} 10 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m2,\s*ta,\s*ma} 2 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m2,\s*tu,\s*ma} 8 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m2,\s*tu,\s*ma} 6 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m2,\s*tu,\s*ma} 12 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m2,\s*tu,\s*ma} 12 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m2,\s*tu,\s*ma} 10 } } */ > /* { dg-final { scan-assembler-times {\tvmv.v.x} 19 } } */ > /* { dg-final { scan-assembler-times {\tvfmv.v.f} 14 } } */ > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c > index 9495d5163d4..ce029b35360 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-3.c > @@ -77,14 +77,10 @@ typedef double vnx8df __attribute__((vector_size > (64))); > TEST_ALL3 (VEC_SET) > TEST_ALL_VAR3 (VEC_SET_VAR3) > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m4,\s*ta,\s*ma} 1 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m4,\s*tu,\s*ma} 5 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m4,\s*ta,\s*ma} 2 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m4,\s*tu,\s*ma} 11 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m4,\s*ta,\s*ma} 2 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m4,\s*tu,\s*ma} 10 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m4,\s*ta,\s*ma} 2 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m4,\s*tu,\s*ma} 8 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m4,\s*tu,\s*ma} 6 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m4,\s*tu,\s*ma} 13 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m4,\s*tu,\s*ma} 12 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m4,\s*tu,\s*ma} 10 } } */ > /* { dg-final { scan-assembler-times {\tvmv.v.x} 19 } } */ > /* { dg-final { scan-assembler-times {\tvfmv.v.f} 15 } } */ > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c > index 9992bc24c34..309256d33fa 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/vec_set-4.c > @@ -80,14 +80,10 @@ typedef double vnx16df __attribute__((vector_size > (128))); > TEST_ALL4 (VEC_SET) > TEST_ALL_VAR4 (VEC_SET_VAR4) > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m8,\s*ta,\s*ma} 1 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m8,\s*tu,\s*ma} 6 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m8,\s*ta,\s*ma} 2 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m8,\s*tu,\s*ma} 13 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m8,\s*ta,\s*ma} 2 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m8,\s*tu,\s*ma} 10 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m8,\s*ta,\s*ma} 2 } } */ > -/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m8,\s*tu,\s*ma} 8 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e8,\s*m8,\s*tu,\s*ma} 7 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e16,\s*m8,\s*tu,\s*ma} 15 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e32,\s*m8,\s*tu,\s*ma} 12 } } */ > +/* { dg-final { scan-assembler-times > {vset[i]*vli\s+[a-z0-9,]+,\s*e64,\s*m8,\s*tu,\s*ma} 10 } } */ > /* { dg-final { scan-assembler-times {\tvmv.v.x} 20 } } */ > /* { dg-final { scan-assembler-times {\tvfmv.v.f} 17 } } */ > -- > 2.45.2 > > >