The problem in this PR is that we call VPSEL with a mask of vector type instead of HImode. This happens because operand 3 in vcond_mask is the pre-computed vector comparison and has vector type. The fix is to transfer this value to VPR.P0 by comparing operand 3 with a vector of constant 1 of the same type as operand 3.
The pr100757*.c testcases are derived from gcc.c-torture/compile/20160205-1.c, forcing the use of MVE, and using different types and return values different from 0 and 1 to avoid commonalization with boolean masks. Reducing the number of iterations in pr100757-3.c from 32 to 8, we generate the code below: float a[32]; float fn1(int d) { int c = 4; for (int b = 0; b < 8; b++) if (a[b] != 2.0f) c = 5; return c; } fn1: ldr r3, .L4+80 vpush.64 {d8, d9} vldrw.32 q3, [r3] // q3=a[0..3] vldr.64 d8, .L4 // q4=(2.0,2.0,2.0,2.0) vldr.64 d9, .L4+8 adds r3, r3, #16 vcmp.f32 eq, q3, q4 // cmp a[0..3] == (2.0,2.0,2.0,2.0) vldr.64 d2, .L4+16 // q1=(1,1,1,1) vldr.64 d3, .L4+24 vldrw.32 q3, [r3] // q3=a[4..7] vldr.64 d4, .L4+32 // q2=(0,0,0,0) vldr.64 d5, .L4+40 vpsel q0, q1, q2 // q0=select (a[0..3]) vcmp.f32 eq, q3, q4 // cmp a[4..7] == (2.0,2.0,2.0,2.0) vldm sp!, {d8-d9} vpsel q2, q1, q2 // q2=select (a[4..7]) vand q2, q0, q2 // q2=select (a[0..3]) && select (a[4..7]) vldr.64 d6, .L4+48 // q3=(4.0,4.0,4.0,4.0) vldr.64 d7, .L4+56 vldr.64 d0, .L4+64 // q0=(5.0,5.0,5.0,5.0) vldr.64 d1, .L4+72 vcmp.i32 eq, q2, q1 // cmp mask(a[0..7]) == (1,1,1,1) vpsel q3, q3, q0 // q3= vcond_mask(4.0,5.0) vmov.32 r3, q3[0] // keep the scalar max vmov.32 r1, q3[1] vmov.32 r0, q3[3] vmov.32 r2, q3[2] vmov s14, r1 vmov s15, r3 vmaxnm.f32 s15, s15, s14 vmov s14, r2 vmaxnm.f32 s15, s15, s14 vmov s14, r0 vmaxnm.f32 s15, s15, s14 vmov r0, s15 bx lr .L5: .align 3 .L4: .word 1073741824 .word 1073741824 .word 1073741824 .word 1073741824 .word 1 .word 1 .word 1 .word 1 .word 0 .word 0 .word 0 .word 0 .word 1082130432 .word 1082130432 .word 1082130432 .word 1082130432 .word 1084227584 .word 1084227584 .word 1084227584 .word 1084227584 2021-06-09 Christophe Lyon <christophe.l...@linaro.org> PR target/100757 gcc/ * config/arm/vec-common.md (vcond_mask_<mode><v_cmp_result>): Fix expansion for MVE. gcc/testsuite/ * gcc.target/arm/simd/pr100757.c: New test. * gcc.target/arm/simd/pr100757-2.c: New test. * gcc.target/arm/simd/pr100757-3.c: New test. --- gcc/config/arm/vec-common.md | 24 +++++++++++++++++-- .../gcc.target/arm/simd/pr100757-2.c | 20 ++++++++++++++++ .../gcc.target/arm/simd/pr100757-3.c | 20 ++++++++++++++++ gcc/testsuite/gcc.target/arm/simd/pr100757.c | 19 +++++++++++++++ 4 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/simd/pr100757-2.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/pr100757-3.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/pr100757.c diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 0ffc7a9322c..ccdfaa8321f 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -478,8 +478,28 @@ (define_expand "vcond_mask_<mode><v_cmp_result>" } else if (TARGET_HAVE_MVE) { - emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0], - operands[1], operands[2], operands[3])); + /* Convert pre-computed vector comparison into VPR.P0 by comparing + operand 3 with a vector of '1', then use VPSEL. */ + machine_mode cmp_mode = GET_MODE (operands[3]); + rtx vpr_p0 = gen_reg_rtx (HImode); + rtx one = gen_reg_rtx (cmp_mode); + emit_move_insn (one, CONST1_RTX (cmp_mode)); + emit_insn (gen_mve_vcmpq (EQ, cmp_mode, vpr_p0, operands[3], one)); + + switch (GET_MODE_CLASS (<MODE>mode)) + { + case MODE_VECTOR_INT: + emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0], operands[1], operands[2], vpr_p0)); + break; + case MODE_VECTOR_FLOAT: + if (TARGET_HAVE_MVE_FLOAT) + emit_insn (gen_mve_vpselq_f (<MODE>mode, operands[0], operands[1], operands[2], vpr_p0)); + else + gcc_unreachable (); + break; + default: + gcc_unreachable (); + } } else gcc_unreachable (); diff --git a/gcc/testsuite/gcc.target/arm/simd/pr100757-2.c b/gcc/testsuite/gcc.target/arm/simd/pr100757-2.c new file mode 100644 index 00000000000..993ce369090 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/pr100757-2.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */ +/* Derived from gcc.c-torture/compile/20160205-1.c. */ + +float a[32]; +int fn1(int d) { + int c = 4; + for (int b = 0; b < 32; b++) + if (a[b] != 2.0f) + c = 5; + return c; +} + +/* { dg-final { scan-assembler-times {\t.word\t1073741824\n} 4 } } */ /* Constant 2.0f. */ +/* { dg-final { scan-assembler-times {\t.word\t1\n} 4 } } */ /* 'true' mask. */ +/* { dg-final { scan-assembler-times {\t.word\t0\n} 4 } } */ /* 'false' mask. */ +/* { dg-final { scan-assembler-times {\t.word\t4\n} 4 } } */ /* Initial value for c. */ +/* { dg-final { scan-assembler-times {\t.word\t5\n} 4 } } */ /* Possible value for c. */ diff --git a/gcc/testsuite/gcc.target/arm/simd/pr100757-3.c b/gcc/testsuite/gcc.target/arm/simd/pr100757-3.c new file mode 100644 index 00000000000..b94a73b2d2c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/pr100757-3.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */ +/* Copied from gcc.c-torture/compile/20160205-1.c. */ + +float a[32]; +float fn1(int d) { + float c = 4; + for (int b = 0; b < 32; b++) + if (a[b] != 2.0f) + c = 5; + return c; +} + +/* { dg-final { scan-assembler-times {\t.word\t1073741824\n} 4 } } */ /* Constant 2.0f. */ +/* { dg-final { scan-assembler-times {\t.word\t1\n} 4 } } */ /* 'true' mask. */ +/* { dg-final { scan-assembler-times {\t.word\t0\n} 4 } } */ /* 'false' mask. */ +/* { dg-final { scan-assembler-times {\t.word\t1084227584\n} 4 } } */ /* Initial value for c. */ +/* { dg-final { scan-assembler-times {\t.word\t1082130432\n} 4 } } */ /* Possible value for c. */ diff --git a/gcc/testsuite/gcc.target/arm/simd/pr100757.c b/gcc/testsuite/gcc.target/arm/simd/pr100757.c new file mode 100644 index 00000000000..e51e716b4ec --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/pr100757.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O3" } */ +/* Derived from gcc.c-torture/compile/20160205-1.c. */ + +int a[32]; +int fn1(int d) { + int c = 2; + for (int b = 0; b < 32; b++) + if (a[b]) + c = 3; + return c; +} + +/* { dg-final { scan-assembler-times {\t.word\t1\n} 4 } } */ /* 'true' mask. */ +/* { dg-final { scan-assembler-times {\t.word\t0\n} 4 } } */ /* 'false' mask. */ +/* { dg-final { scan-assembler-times {\t.word\t2\n} 4 } } */ /* Initial value for c. */ +/* { dg-final { scan-assembler-times {\t.word\t3\n} 4 } } */ /* Possible value for c. */ -- 2.25.1