Hi,
This simple patch fixes the ICE by adding LTGT in vec_cmp<mode><v_cmp_result>
pattern.
I also modified the original test case into a compilation one since
-fno-wrapping-math
should not be used in general.
Bootstrap and test on AArch64, test result check for x86_64. Is it OK? I
would also need to
backport it to gcc-7-branch.
Thanks,
bin
2017-07-27 Bin Cheng <bin.ch...@arm.com>
PR target/81228
* config/aarch64/aarch64-simd.md (vec_cmp<mode><v_cmp_result>): Add
LTGT.
gcc/testsuite/ChangeLog
2017-07-27 Bin Cheng <bin.ch...@arm.com>
PR target/81228
* gcc.dg/pr81228.c: New.
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index 011fcec0..9cd67a2 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2524,6 +2524,7 @@
case EQ:
comparison = gen_aarch64_cmeq<mode>;
break;
+ case LTGT:
case UNEQ:
case ORDERED:
case UNORDERED:
@@ -2571,6 +2572,7 @@
emit_insn (comparison (operands[0], operands[2], operands[3]));
break;
+ case LTGT:
case UNEQ:
/* We first check (a > b || b > a) which is !UNEQ, inverting
this result will then give us (a == b || a UNORDERED b). */
@@ -2578,7 +2580,8 @@
operands[2], operands[3]));
emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
- emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
+ if (code == UNEQ)
+ emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
break;
case UNORDERED:
diff --git a/gcc/testsuite/gcc.dg/pr81228.c b/gcc/testsuite/gcc.dg/pr81228.c
new file mode 100644
index 0000000..3334299
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr81228.c
@@ -0,0 +1,47 @@
+/* PR target/81228 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-trapping-math" } */
+/* { dg-options "-O3 -fno-trapping-math -mavx" { target avx_runtime } } */
+
+double s1[4], s2[4], s3[64];
+
+int
+main (void)
+{
+ int i;
+ asm volatile ("" : : : "memory");
+ for (i = 0; i < 4; i++)
+ s3[0 * 4 + i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[1 * 4 + i] = (!__builtin_isgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[2 * 4 + i] = __builtin_isgreaterequal (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[3 * 4 + i] = (!__builtin_isgreaterequal (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[4 * 4 + i] = __builtin_isless (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[5 * 4 + i] = (!__builtin_isless (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[6 * 4 + i] = __builtin_islessequal (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[7 * 4 + i] = (!__builtin_islessequal (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[8 * 4 + i] = __builtin_islessgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[9 * 4 + i] = (!__builtin_islessgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[10 * 4 + i] = __builtin_isunordered (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[11 * 4 + i] = (!__builtin_isunordered (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[12 * 4 + i] = s1[i] > s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[13 * 4 + i] = s1[i] >= s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[14 * 4 + i] = s1[i] < s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[15 * 4 + i] = s1[i] <= s2[i] ? -1.0 : 0.0;
+ asm volatile ("" : : : "memory");
+ return 0;
+}