100757)

Richard Sandiford via Gcc-patches Wed, 09 Jun 2021 08:04:24 -0700

Christophe Lyon <christophe.l...@linaro.org> writes:
> The problem in this PR is that we call VPSEL with a mask of vector
> type instead of HImode. This happens because operand 3 in vcond_mask
> is the pre-computed vector comparison and has vector type. The fix is
> to transfer this value to VPR.P0 by comparing operand 3 with a vector
> of constant 1 of the same type as operand 3.


The alternative is to implement TARGET_VECTORIZE_GET_MASK_MODE
and return HImode for MVE.  This is how AVX512 handles masks.

It might be worth trying that to see how it works.  I'm not sure
off-hand whether it'll produce worse code or better code.  However,
using HImode as the mask mode would help when defining other
predicated optabs in future.

Thanks,
Richard

> The pr100757*.c testcases are derived from
> gcc.c-torture/compile/20160205-1.c, forcing the use of MVE, and using
> different types and return values different from 0 and 1 to avoid
> commonalization with boolean masks.
>
> Reducing the number of iterations in pr100757-3.c from 32 to 8, we
> generate the code below:
>
> float a[32];
> float fn1(int d) {
>   int c = 4;
>   for (int b = 0; b < 8; b++)
>     if (a[b] != 2.0f)
>       c = 5;
>   return c;
> }
>
> fn1:
>         ldr     r3, .L4+80
>       vpush.64        {d8, d9}
>       vldrw.32        q3, [r3]        // q3=a[0..3]
>       vldr.64 d8, .L4                 // q4=(2.0,2.0,2.0,2.0)
>       vldr.64 d9, .L4+8
>       adds    r3, r3, #16
>       vcmp.f32        eq, q3, q4      // cmp a[0..3] == (2.0,2.0,2.0,2.0)
>       vldr.64 d2, .L4+16              // q1=(1,1,1,1)
>       vldr.64 d3, .L4+24
>       vldrw.32        q3, [r3]        // q3=a[4..7]
>       vldr.64 d4, .L4+32              // q2=(0,0,0,0)
>       vldr.64 d5, .L4+40
>       vpsel q0, q1, q2                // q0=select (a[0..3])
>       vcmp.f32        eq, q3, q4      // cmp a[4..7] == (2.0,2.0,2.0,2.0)
>       vldm    sp!, {d8-d9}
>       vpsel q2, q1, q2                // q2=select (a[4..7])
>       vand    q2, q0, q2              // q2=select (a[0..3]) && select 
> (a[4..7])
>       vldr.64 d6, .L4+48              // q3=(4.0,4.0,4.0,4.0)
>       vldr.64 d7, .L4+56
>       vldr.64 d0, .L4+64              // q0=(5.0,5.0,5.0,5.0)
>       vldr.64 d1, .L4+72
>       vcmp.i32  eq, q2, q1            // cmp mask(a[0..7]) == (1,1,1,1)
>       vpsel q3, q3, q0                // q3= vcond_mask(4.0,5.0)
>       vmov.32 r3, q3[0]               // keep the scalar max
>       vmov.32 r1, q3[1]
>       vmov.32 r0, q3[3]
>       vmov.32 r2, q3[2]
>       vmov    s14, r1
>       vmov    s15, r3
>       vmaxnm.f32      s15, s15, s14
>       vmov    s14, r2
>       vmaxnm.f32      s15, s15, s14
>       vmov    s14, r0
>       vmaxnm.f32      s15, s15, s14
>       vmov    r0, s15
>       bx      lr
>       .L5:
>       .align  3
>       .L4:
>       .word   1073741824
>       .word   1073741824
>       .word   1073741824
>       .word   1073741824
>       .word   1
>       .word   1
>       .word   1
>       .word   1
>       .word   0
>       .word   0
>       .word   0
>       .word   0
>       .word   1082130432
>       .word   1082130432
>       .word   1082130432
>       .word   1082130432
>       .word   1084227584
>       .word   1084227584
>       .word   1084227584
>       .word   1084227584
>
> 2021-06-09  Christophe Lyon  <christophe.l...@linaro.org>
>
>       PR target/100757
>       gcc/
>       * config/arm/vec-common.md (vcond_mask_<mode><v_cmp_result>): Fix
>       expansion for MVE.
>
>       gcc/testsuite/
>       * gcc.target/arm/simd/pr100757.c: New test.
>       * gcc.target/arm/simd/pr100757-2.c: New test.
>       * gcc.target/arm/simd/pr100757-3.c: New test.
> ---
>  gcc/config/arm/vec-common.md                  | 24 +++++++++++++++++--
>  .../gcc.target/arm/simd/pr100757-2.c          | 20 ++++++++++++++++
>  .../gcc.target/arm/simd/pr100757-3.c          | 20 ++++++++++++++++
>  gcc/testsuite/gcc.target/arm/simd/pr100757.c  | 19 +++++++++++++++
>  4 files changed, 81 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arm/simd/pr100757-2.c
>  create mode 100644 gcc/testsuite/gcc.target/arm/simd/pr100757-3.c
>  create mode 100644 gcc/testsuite/gcc.target/arm/simd/pr100757.c
>
> diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
> index 0ffc7a9322c..ccdfaa8321f 100644
> --- a/gcc/config/arm/vec-common.md
> +++ b/gcc/config/arm/vec-common.md
> @@ -478,8 +478,28 @@ (define_expand "vcond_mask_<mode><v_cmp_result>"
>      }
>    else if (TARGET_HAVE_MVE)
>      {
> -      emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0],
> -                                 operands[1], operands[2], operands[3]));
> +      /* Convert pre-computed vector comparison into VPR.P0 by comparing
> +         operand 3 with a vector of '1', then use VPSEL.  */
> +      machine_mode cmp_mode = GET_MODE (operands[3]);
> +      rtx vpr_p0 = gen_reg_rtx (HImode);
> +      rtx one = gen_reg_rtx (cmp_mode);
> +      emit_move_insn (one, CONST1_RTX (cmp_mode));
> +      emit_insn (gen_mve_vcmpq (EQ, cmp_mode, vpr_p0, operands[3], one));
> +
> +      switch (GET_MODE_CLASS (<MODE>mode))
> +        {
> +          case MODE_VECTOR_INT:
> +            emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0], 
> operands[1], operands[2], vpr_p0));
> +            break;
> +          case MODE_VECTOR_FLOAT:
> +         if (TARGET_HAVE_MVE_FLOAT)
> +              emit_insn (gen_mve_vpselq_f (<MODE>mode, operands[0], 
> operands[1], operands[2], vpr_p0));
> +         else
> +           gcc_unreachable ();
> +            break;
> +          default:
> +            gcc_unreachable ();
> +         }
>      }
>    else
>      gcc_unreachable ();
> diff --git a/gcc/testsuite/gcc.target/arm/simd/pr100757-2.c 
> b/gcc/testsuite/gcc.target/arm/simd/pr100757-2.c
> new file mode 100644
> index 00000000000..993ce369090
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/pr100757-2.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
> +/* { dg-add-options arm_v8_1m_mve_fp } */
> +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */
> +/* Derived from gcc.c-torture/compile/20160205-1.c.  */
> +
> +float a[32];
> +int fn1(int d) {
> +  int c = 4;
> +  for (int b = 0; b < 32; b++)
> +    if (a[b] != 2.0f)
> +      c = 5;
> +  return c;
> +}
> +
> +/* { dg-final { scan-assembler-times {\t.word\t1073741824\n} 4 } } */ /* 
> Constant 2.0f.  */
> +/* { dg-final { scan-assembler-times {\t.word\t1\n} 4 } } */ /* 'true' mask. 
>  */
> +/* { dg-final { scan-assembler-times {\t.word\t0\n} 4 } } */ /* 'false' 
> mask.  */
> +/* { dg-final { scan-assembler-times {\t.word\t4\n} 4 } } */ /* Initial 
> value for c.  */
> +/* { dg-final { scan-assembler-times {\t.word\t5\n} 4 } } */ /* Possible 
> value for c.  */
> diff --git a/gcc/testsuite/gcc.target/arm/simd/pr100757-3.c 
> b/gcc/testsuite/gcc.target/arm/simd/pr100757-3.c
> new file mode 100644
> index 00000000000..b94a73b2d2c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/pr100757-3.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
> +/* { dg-add-options arm_v8_1m_mve_fp } */
> +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */
> +/* Copied from gcc.c-torture/compile/20160205-1.c.  */
> +
> +float a[32];
> +float fn1(int d) {
> +  float c = 4;
> +  for (int b = 0; b < 32; b++)
> +    if (a[b] != 2.0f)
> +      c = 5;
> +  return c;
> +}
> +
> +/* { dg-final { scan-assembler-times {\t.word\t1073741824\n} 4 } } */ /* 
> Constant 2.0f.  */
> +/* { dg-final { scan-assembler-times {\t.word\t1\n} 4 } } */ /* 'true' mask. 
>  */
> +/* { dg-final { scan-assembler-times {\t.word\t0\n} 4 } } */ /* 'false' 
> mask.  */
> +/* { dg-final { scan-assembler-times {\t.word\t1084227584\n} 4 } } */ /* 
> Initial value for c.  */
> +/* { dg-final { scan-assembler-times {\t.word\t1082130432\n} 4 } } */ /* 
> Possible value for c.  */
> diff --git a/gcc/testsuite/gcc.target/arm/simd/pr100757.c 
> b/gcc/testsuite/gcc.target/arm/simd/pr100757.c
> new file mode 100644
> index 00000000000..e51e716b4ec
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/pr100757.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O3" } */
> +/* Derived from gcc.c-torture/compile/20160205-1.c.  */
> +
> +int a[32];
> +int fn1(int d) {
> +  int c = 2;
> +  for (int b = 0; b < 32; b++)
> +    if (a[b])
> +      c = 3;
> +  return c;
> +}
> +
> +/* { dg-final { scan-assembler-times {\t.word\t1\n} 4 } } */ /* 'true' mask. 
>  */
> +/* { dg-final { scan-assembler-times {\t.word\t0\n} 4 } } */ /* 'false' 
> mask.  */
> +/* { dg-final { scan-assembler-times {\t.word\t2\n} 4 } } */ /* Initial 
> value for c.  */
> +/* { dg-final { scan-assembler-times {\t.word\t3\n} 4 } } */ /* Possible 
> value for c.  */

Re: [PATCH 1/2] arm: Fix vcond_mask expander for MVE (PR target/100757)

Reply via email to