Hello, This patch adds a missing not to the SVE2 BCAX (Bitwise clear and exclusive or) pattern, fixing the PR. Since SVE doesn't have an unpredicated not instruction, we need to use a (vacuously) predicated not here.
To ensure that the predicate is instantiated correctly (to all 1s) for the intrinsics, we pull out a separate expander from the define_insn. >From the ISA reference [1]: > Bitwise AND elements of the second source vector with the > corresponding inverted elements of the third source vector, then > exclusive OR the results with corresponding elements of the first > source vector. Testing: * Regression tested an aarch64-linux-gnu cross configured with --with-arch=armv8.2-a+sve2, no new failures. * Bootstrap and regression test on aarch64-linux-gnu in progress. The following execution tests went from FAIL to PASS on the SVE2 regression run as a result of this change: FAIL->PASS: gcc.c-torture/execute/pr37573.c -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test FAIL->PASS: gcc.c-torture/execute/pr37573.c -O3 -g execution test FAIL->PASS: gcc.dg/torture/pr69714.c -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test FAIL->PASS: gcc.dg/torture/pr69714.c -O3 -g execution test FAIL->PASS: gcc.dg/vect/pr70021.c execution test FAIL->PASS: gcc.dg/vect/pr70021.c -flto -ffat-lto-objects execution test OK for trunk (provided patch passes bootstrap/regtest)? Thanks, Alex [1] : https://developer.arm.com/docs/ddi0602/g/a64-sve-instructions-alphabetic-order/bcax-bitwise-clear-and-exclusive-or --- gcc/ChangeLog: PR target/97730 * config/aarch64/aarch64-sve2.md (@aarch64_sve2_bcax<mode>): Change to define_expand, add missing (trivially-predicated) not rtx to fix wrong code bug. (*aarch64_sve2_bcax<mode>): New. gcc/testsuite/ChangeLog: PR target/97730 * gcc.target/aarch64/sve2/bcax_1.c (OP): Add missing bitwise not to match correct bcax semantics. * gcc.dg/vect/pr97730.c: New test.
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 0cafd0b690d..12dc9aaac55 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -786,17 +786,42 @@ (define_insn "@aarch64_sve2_xar<mode>" ;; ------------------------------------------------------------------------- ;; Unpredicated exclusive OR of AND. -(define_insn "@aarch64_sve2_bcax<mode>" +(define_expand "@aarch64_sve2_bcax<mode>" + [(set (match_operand:SVE_FULL_I 0 "register_operand") + (xor:SVE_FULL_I + (and:SVE_FULL_I + (unspec:SVE_FULL_I + [(match_dup 4) + (not:SVE_FULL_I + (match_operand:SVE_FULL_I 3 "register_operand"))] + UNSPEC_PRED_X) + (match_operand:SVE_FULL_I 2 "register_operand")) + (match_operand:SVE_FULL_I 1 "register_operand")))] + "TARGET_SVE2" + { + operands[4] = CONSTM1_RTX (<VPRED>mode); + } +) + +(define_insn_and_rewrite "*aarch64_sve2_bcax<mode>" [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") (xor:SVE_FULL_I (and:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "w, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) + (unspec:SVE_FULL_I + [(match_operand 4) + (not:SVE_FULL_I + (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] + UNSPEC_PRED_X) + (match_operand:SVE_FULL_I 2 "register_operand" "w, w")) (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))] "TARGET_SVE2" "@ bcax\t%0.d, %0.d, %2.d, %3.d movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d" + "&& !CONSTANT_P (operands[4])" + { + operands[4] = CONSTM1_RTX (<VPRED>mode); + } [(set_attr "movprfx" "*,yes")] ) diff --git a/gcc/testsuite/gcc.dg/vect/pr97730.c b/gcc/testsuite/gcc.dg/vect/pr97730.c new file mode 100644 index 00000000000..af4bca44879 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr97730.c @@ -0,0 +1,12 @@ +/* { dg-additional-options "-O1" } */ +unsigned b = 0xce8e5a48, c = 0xb849691a; +unsigned a[8080]; +int main() { + a[0] = b; + c = c; + unsigned f = 0xb1e8; + for (int h = 0; h < 5; h++) + a[h] = (b & c) ^ f; + if (a[0] != 0x8808f9e0) + __builtin_abort(); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/bcax_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/bcax_1.c index 4b0d5a9e67c..7c31afc4f19 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/bcax_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/bcax_1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ -#define OP(x,y,z) ((x) ^ ((y) & (z))) +#define OP(x,y,z) ((x) ^ (~(y) & (z))) #include "bitsel_1.c"