LGTM, but pending this to the GCC 14 queue.
On Wed, Mar 22, 2023 at 8:16 PM <juzhe.zh...@rivai.ai> wrote: > > From: Ju-Zhe Zhong <juzhe.zh...@rivai.ai> > > Current expansion of vmsge will make RA produce redundant vmv1r.v. > > testcase: > void f1 (void * in, void *out, int32_t x) > { > vbool32_t mask = *(vbool32_t*)in; > asm volatile ("":::"memory"); > vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4); > vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4); > vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4); > vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4); > m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4); > __riscv_vsm_v_b32 (out, m4, 4); > } > > Before this patch: > f1: > vsetvli a5,zero,e8,mf4,ta,ma > vlm.v v0,0(a0) > vsetivli zero,4,e32,m1,ta,mu > vle32.v v3,0(a0) > vle32.v v2,0(a0),v0.t > vmslt.vx v1,v3,a2 > vmnot.m v1,v1 > vmslt.vx v1,v3,a2,v0.t > vmxor.mm v1,v1,v0 > vmv1r.v v0,v1 > vmsge.vv v2,v2,v2,v0.t > vsm.v v2,0(a1) > ret > > After this patch: > f1: > vsetvli a5,zero,e8,mf4,ta,ma > vlm.v v0,0(a0) > vsetivli zero,4,e32,m1,ta,mu > vle32.v v3,0(a0) > vle32.v v2,0(a0),v0.t > vmslt.vx v1,v3,a2 > vmnot.m v1,v1 > vmslt.vx v1,v3,a2,v0.t > vmxor.mm v0,v1,v0 > vmsge.vv v2,v2,v2,v0.t > vsm.v v2,0(a1) > ret > > > gcc/ChangeLog: > > * config/riscv/vector.md: Fix redundant vmv1r.v. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly > check. > > --- > gcc/config/riscv/vector.md | 15 +++++++-------- > .../riscv/rvv/base/binop_vx_constraint-150.c | 2 +- > 2 files changed, 8 insertions(+), 9 deletions(-) > > diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > index ebb014aecb1..f06d68be80f 100644 > --- a/gcc/config/riscv/vector.md > +++ b/gcc/config/riscv/vector.md > @@ -4111,6 +4111,7 @@ > { > enum rtx_code code = GET_CODE (operands[3]); > rtx undef = RVV_VUNDEF (<VM>mode); > + rtx tmp = gen_reg_rtx (<VM>mode); > if (code == GEU && rtx_equal_p (operands[5], const0_rtx)) > { > /* If vmsgeu with 0 immediate, expand it to vmset. */ > @@ -4157,12 +4158,11 @@ > - pseudoinstruction: vmsge{u}.vx vd, va, x > - expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd. */ > emit_insn ( > - gen_pred_cmp<mode>_scalar (operands[0], operands[1], operands[2], > + gen_pred_cmp<mode>_scalar (tmp, operands[1], operands[2], > operands[3], operands[4], operands[5], > operands[6], operands[7], > operands[8])); > emit_insn (gen_pred_nand<vm> (operands[0], CONSTM1_RTX (<VM>mode), > - undef, operands[0], operands[0], > - operands[6], operands[8])); > + undef, tmp, tmp, operands[6], > operands[8])); > } > else > { > @@ -4171,13 +4171,12 @@ > /* masked va >= x, vd == v0 > - pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt > - expansion: vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt. */ > - rtx reg = gen_reg_rtx (<VM>mode); > emit_insn (gen_pred_cmp<mode>_scalar ( > - reg, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4], > + tmp, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4], > operands[5], operands[6], operands[7], operands[8])); > emit_insn ( > gen_pred_andnot<vm> (operands[0], CONSTM1_RTX (<VM>mode), > undef, > - operands[1], reg, operands[6], > operands[8])); > + operands[1], tmp, operands[6], > operands[8])); > } > else > { > @@ -4186,10 +4185,10 @@ > - expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0. > */ > emit_insn (gen_pred_cmp<mode>_scalar ( > - operands[0], operands[1], operands[2], operands[3], > operands[4], > + tmp, operands[1], operands[2], operands[3], operands[4], > operands[5], operands[6], operands[7], operands[8])); > emit_insn (gen_pred (XOR, <VM>mode, operands[0], > - CONSTM1_RTX (<VM>mode), undef, operands[0], > + CONSTM1_RTX (<VM>mode), undef, tmp, > operands[1], operands[6], operands[8])); > } > } > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c > index 55a222f47ea..e92a8115f09 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c > @@ -18,4 +18,4 @@ void f1 (void * in, void *out, int32_t x) > /* { dg-final { scan-assembler-times > {vmslt\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t} 1 } } */ > /* { dg-final { scan-assembler-times > {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 1 } } */ > /* { dg-final { scan-assembler-times {vmnot\.m\s+v[0-9]+,\s*v[0-9]+} 1 } } */ > -/* { dg-final { scan-assembler-times {vmv} 1 } } */ > +/* { dg-final { scan-assembler-not {vmv} } } */ > -- > 2.36.1 >