LGTM, but pending this to the GCC 14 queue.

On Wed, Mar 22, 2023 at 8:16 PM <juzhe.zh...@rivai.ai> wrote:
>
> From: Ju-Zhe Zhong <juzhe.zh...@rivai.ai>
>
> Current expansion of vmsge will make RA produce redundant vmv1r.v.
>
> testcase:
> void f1 (void * in, void *out, int32_t x)
> {
>     vbool32_t mask = *(vbool32_t*)in;
>     asm volatile ("":::"memory");
>     vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
>     vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
>     vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4);
>     vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4);
>     m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4);
>     __riscv_vsm_v_b32 (out, m4, 4);
> }
>
> Before this patch:
> f1:
>         vsetvli a5,zero,e8,mf4,ta,ma
>         vlm.v   v0,0(a0)
>         vsetivli        zero,4,e32,m1,ta,mu
>         vle32.v v3,0(a0)
>         vle32.v v2,0(a0),v0.t
>         vmslt.vx        v1,v3,a2
>         vmnot.m v1,v1
>         vmslt.vx        v1,v3,a2,v0.t
>         vmxor.mm        v1,v1,v0
>         vmv1r.v v0,v1
>         vmsge.vv        v2,v2,v2,v0.t
>         vsm.v   v2,0(a1)
>         ret
>
> After this patch:
> f1:
>         vsetvli a5,zero,e8,mf4,ta,ma
>         vlm.v   v0,0(a0)
>         vsetivli        zero,4,e32,m1,ta,mu
>         vle32.v v3,0(a0)
>         vle32.v v2,0(a0),v0.t
>         vmslt.vx        v1,v3,a2
>         vmnot.m v1,v1
>         vmslt.vx        v1,v3,a2,v0.t
>         vmxor.mm        v0,v1,v0
>         vmsge.vv        v2,v2,v2,v0.t
>         vsm.v   v2,0(a1)
>         ret
>
>
> gcc/ChangeLog:
>
>         * config/riscv/vector.md: Fix redundant vmv1r.v.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly 
> check.
>
> ---
>  gcc/config/riscv/vector.md                        | 15 +++++++--------
>  .../riscv/rvv/base/binop_vx_constraint-150.c      |  2 +-
>  2 files changed, 8 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index ebb014aecb1..f06d68be80f 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -4111,6 +4111,7 @@
>  {
>    enum rtx_code code = GET_CODE (operands[3]);
>    rtx undef = RVV_VUNDEF (<VM>mode);
> +  rtx tmp = gen_reg_rtx (<VM>mode);
>    if (code == GEU && rtx_equal_p (operands[5], const0_rtx))
>      {
>        /* If vmsgeu with 0 immediate, expand it to vmset.  */
> @@ -4157,12 +4158,11 @@
>             - pseudoinstruction: vmsge{u}.vx vd, va, x
>             - expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd.  */
>           emit_insn (
> -           gen_pred_cmp<mode>_scalar (operands[0], operands[1], operands[2],
> +           gen_pred_cmp<mode>_scalar (tmp, operands[1], operands[2],
>                                         operands[3], operands[4], operands[5],
>                                         operands[6], operands[7], 
> operands[8]));
>           emit_insn (gen_pred_nand<vm> (operands[0], CONSTM1_RTX (<VM>mode),
> -                                       undef, operands[0], operands[0],
> -                                       operands[6], operands[8]));
> +                                       undef, tmp, tmp, operands[6], 
> operands[8]));
>         }
>        else
>         {
> @@ -4171,13 +4171,12 @@
>               /* masked va >= x, vd == v0
>                 - pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
>                 - expansion: vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt.  */
> -             rtx reg = gen_reg_rtx (<VM>mode);
>               emit_insn (gen_pred_cmp<mode>_scalar (
> -               reg, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4],
> +               tmp, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4],
>                 operands[5], operands[6], operands[7], operands[8]));
>               emit_insn (
>                 gen_pred_andnot<vm> (operands[0], CONSTM1_RTX (<VM>mode), 
> undef,
> -                                  operands[1], reg, operands[6], 
> operands[8]));
> +                                  operands[1], tmp, operands[6], 
> operands[8]));
>             }
>           else
>             {
> @@ -4186,10 +4185,10 @@
>                 - expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0.
>               */
>               emit_insn (gen_pred_cmp<mode>_scalar (
> -               operands[0], operands[1], operands[2], operands[3], 
> operands[4],
> +               tmp, operands[1], operands[2], operands[3], operands[4],
>                 operands[5], operands[6], operands[7], operands[8]));
>               emit_insn (gen_pred (XOR, <VM>mode, operands[0],
> -                                  CONSTM1_RTX (<VM>mode), undef, operands[0],
> +                                  CONSTM1_RTX (<VM>mode), undef, tmp,
>                                    operands[1], operands[6], operands[8]));
>             }
>         }
> diff --git 
> a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c
> index 55a222f47ea..e92a8115f09 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c
> @@ -18,4 +18,4 @@ void f1 (void * in, void *out, int32_t x)
>  /* { dg-final { scan-assembler-times 
> {vmslt\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t} 1 } } */
>  /* { dg-final { scan-assembler-times 
> {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 1 } } */
>  /* { dg-final { scan-assembler-times {vmnot\.m\s+v[0-9]+,\s*v[0-9]+} 1 } } */
> -/* { dg-final { scan-assembler-times {vmv} 1 } } */
> +/* { dg-final { scan-assembler-not {vmv} } } */
> --
> 2.36.1
>

Reply via email to