On Mon, Oct 20, 2025 at 11:28 PM Tamar Christina <[email protected]> wrote: > > The vectorizer has learned how to do boolean reductions of masks to a C bool > for the operations OR, XOR and AND. > > This implements the new optabs for Adv.SIMD. Adv.SIMD today can already > vectorize such loops but does so through SHIFT-AND-INSERT to perform the > reductions step-wise and inorder. As an example, an OR reduction today does: > > movi v3.4s, 0 > ext v5.16b, v30.16b, v3.16b, #8 > orr v5.16b, v5.16b, v30.16b > ext v29.16b, v5.16b, v3.16b, #4 > orr v29.16b, v29.16b, v5.16b > ext v4.16b, v29.16b, v3.16b, #2 > orr v4.16b, v4.16b, v29.16b > ext v3.16b, v4.16b, v3.16b, #1 > orr v3.16b, v3.16b, v4.16b > fmov w1, s3 > and w1, w1, 1 > > For reducing to a boolean however we don't need the stepwise reduction and can > just look at the bit patterns. For e.g. OR we now generate: > > umaxp v3.4s, v3.4s, v3.4s > fmov x1, d3 > cmp x1, 0 > cset w0, ne > > For the remaining codegen see test vect-reduc-bool-9.c. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * config/aarch64/aarch64-simd.md (reduc_sbool_and_scal_<mode>, > reduc_sbool_ior_scal_<mode>, reduc_sbool_xor_scal_<mode>): New. > * config/aarch64/iterators.md (VALLI): New. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/vect-reduc-bool-1.c: New test. > * gcc.target/aarch64/vect-reduc-bool-2.c: New test. > * gcc.target/aarch64/vect-reduc-bool-3.c: New test. > * gcc.target/aarch64/vect-reduc-bool-4.c: New test. > * gcc.target/aarch64/vect-reduc-bool-5.c: New test. > * gcc.target/aarch64/vect-reduc-bool-6.c: New test. > * gcc.target/aarch64/vect-reduc-bool-7.c: New test. > * gcc.target/aarch64/vect-reduc-bool-8.c: New test. > * gcc.target/aarch64/vect-reduc-bool-9.c: New test. > > --- > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > eaa8d57cc41387717affe25ec6694ec3502e3950..5eddc05b5749bbd080a085db2e15dbb9bbce3be3 > 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -3469,6 +3469,87 @@ (define_expand "reduc_plus_scal_v4sf" > DONE; > }) > > +;; AND tree reductions. > +;; Check if after a min pairwise reduction that all the lanes are 1. > +;; > +(define_expand "reduc_sbool_and_scal_<mode>" > + [(set (match_operand:QI 0 "register_operand") > + (unspec:QI [(match_operand:VALLI 1 "register_operand")] > + UNSPEC_ANDV))] > + "TARGET_SIMD" > +{ > + rtx tmp = operands[1]; > + /* For 64-bit vectors we need no reductions. */ > + if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode))) > + { > + /* Always reduce using a V4SI. */ > + rtx reduc = gen_lowpart (V4SImode, tmp); > + rtx res = gen_reg_rtx (V4SImode); > + emit_insn (gen_aarch64_uminpv4si (res, reduc, reduc)); > + emit_move_insn (tmp, gen_lowpart (<MODE>mode, res));
This is wrong as tmp is operands[1] and you just overwrote the value in operands[1] which might be used later on. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=123026 for a testcase. Most likely you need: tmp = gen_reg_rtx (<MODE>mode); before the emit_move_insn. Thanks, Andrew > + } > + rtx val = gen_reg_rtx (DImode); > + emit_move_insn (val, gen_lowpart (DImode, tmp)); > + rtx cc_reg = aarch64_gen_compare_reg (EQ, val, constm1_rtx); > + rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, constm1_rtx); > + rtx tmp2 = gen_reg_rtx (SImode); > + emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg)); > + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); > + DONE; > +}) > + > +;; IOR tree reductions. > +;; Check that after a MAX pairwise reduction any lane is not 0 > +;; > +(define_expand "reduc_sbool_ior_scal_<mode>" > + [(set (match_operand:QI 0 "register_operand") > + (unspec:QI [(match_operand:VALLI 1 "register_operand")] > + UNSPEC_IORV))] > + "TARGET_SIMD" > +{ > + rtx tmp = operands[1]; > + /* For 64-bit vectors we need no reductions. */ > + if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode))) > + { > + /* Always reduce using a V4SI. */ > + rtx reduc = gen_lowpart (V4SImode, tmp); > + rtx res = gen_reg_rtx (V4SImode); > + emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc)); > + emit_move_insn (tmp, gen_lowpart (<MODE>mode, res)); Likewise. > + } > + rtx val = gen_reg_rtx (DImode); > + emit_move_insn (val, gen_lowpart (DImode, tmp)); > + rtx cc_reg = aarch64_gen_compare_reg (NE, val, const0_rtx); > + rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx); > + rtx tmp2 = gen_reg_rtx (SImode); > + emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg)); > + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); > + DONE; > +}) > + > +;; Unpredicated predicate XOR tree reductions. > +;; Check to see if the number of active lanes in the predicates is a multiple > +;; of 2. We use a normal reduction after masking with 0x1. > +;; > +(define_expand "reduc_sbool_xor_scal_<mode>" > + [(set (match_operand:QI 0 "register_operand") > + (unspec:QI [(match_operand:VALLI 1 "register_operand")] > + UNSPEC_XORV))] > + "TARGET_SIMD" > +{ > + rtx tmp = gen_reg_rtx (<MODE>mode); > + rtx one_reg = force_reg (<MODE>mode, CONST1_RTX (<MODE>mode)); > + emit_move_insn (tmp, gen_rtx_AND (<MODE>mode, operands[1], one_reg)); > + rtx tmp2 = gen_reg_rtx (<VEL>mode); > + emit_insn (gen_reduc_plus_scal_<mode> (tmp2, tmp)); > + rtx tmp3 = gen_reg_rtx (DImode); > + emit_move_insn (tmp3, gen_rtx_AND (DImode, > + lowpart_subreg (DImode, tmp2, <VEL>mode), > + const1_rtx)); > + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); > + DONE; > +}) > + > ;; SADDLV and UADDLV can be expressed as an ADDV instruction that first > ;; sign or zero-extends its elements. > (define_insn "aarch64_<su>addlv<mode>" > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index > 3757998c0ea9831b526a5bbc8568933fc05ed5d4..c369b19507a9bb06ca60e883b19823ded7c01c85 > 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -217,6 +217,9 @@ (define_mode_iterator V2F [V2SF V2DF]) > ;; All Advanced SIMD modes on which we support any arithmetic operations. > (define_mode_iterator VALL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF > V2DF]) > > +;; All Advanced SIMD integer modes > +(define_mode_iterator VALLI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI]) > + > ;; All Advanced SIMD modes suitable for moving, loading, and storing. > (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI > V4HF V8HF V4BF V8BF V2SF V4SF V2DF]) > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-1.c > b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-1.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..c9b1c85c222e164da0f60f4774469d43036b6afc > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-1.c > @@ -0,0 +1,51 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only > -fdump-tree-vect-details" }*/ > + > +char p[128]; > + > +bool __attribute__((noipa)) > +fand (int n) > +{ > + bool r = true; > + for (int i = 0; i < n; ++i) > + r &= (p[i] != 0); > + return r; > +} > + > +bool __attribute__((noipa)) > +fior (int n) > +{ > + bool r = false; > + for (int i = 0; i < n; ++i) > + r |= (p[i] != 0); > + return r; > +} > + > +int main() > +{ > + __builtin_memset (p, 1, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (!fand (n)) > + __builtin_abort (); > + > + p[0] = 0; > + for (int n = 1; n < 77; ++n) > + if (fand (n)) > + __builtin_abort (); > + > + __builtin_memset (p, 0, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (fior (n)) > + __builtin_abort (); > + > + p[0] = 1; > + for (int n = 1; n < 77; ++n) > + if (!fior (n)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } > } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-2.c > b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-2.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..598d6c71ec84bc7327b01ff94e51f4a213f07ff6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-2.c > @@ -0,0 +1,51 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only > -fdump-tree-vect-details" }*/ > + > +short p[128]; > + > +bool __attribute__((noipa)) > +fand (int n) > +{ > + bool r = true; > + for (int i = 0; i < n; ++i) > + r &= (p[i] != 0); > + return r; > +} > + > +bool __attribute__((noipa)) > +fior (int n) > +{ > + bool r = false; > + for (int i = 0; i < n; ++i) > + r |= (p[i] != 0); > + return r; > +} > + > +int main() > +{ > + __builtin_memset (p, 1, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (!fand (n)) > + __builtin_abort (); > + > + p[0] = 0; > + for (int n = 1; n < 77; ++n) > + if (fand (n)) > + __builtin_abort (); > + > + __builtin_memset (p, 0, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (fior (n)) > + __builtin_abort (); > + > + p[0] = 1; > + for (int n = 1; n < 77; ++n) > + if (!fior (n)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } > } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-3.c > b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-3.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..9517965753a7cfdd06b05d9298a14db4bb7112f9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-3.c > @@ -0,0 +1,51 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only > -fdump-tree-vect-details" }*/ > + > +int p[128]; > + > +bool __attribute__((noipa)) > +fand (int n) > +{ > + bool r = true; > + for (int i = 0; i < n; ++i) > + r &= (p[i] != 0); > + return r; > +} > + > +bool __attribute__((noipa)) > +fior (int n) > +{ > + bool r = false; > + for (int i = 0; i < n; ++i) > + r |= (p[i] != 0); > + return r; > +} > + > +int main() > +{ > + __builtin_memset (p, 1, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (!fand (n)) > + __builtin_abort (); > + > + p[0] = 0; > + for (int n = 1; n < 77; ++n) > + if (fand (n)) > + __builtin_abort (); > + > + __builtin_memset (p, 0, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (fior (n)) > + __builtin_abort (); > + > + p[0] = 1; > + for (int n = 1; n < 77; ++n) > + if (!fior (n)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } > } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-4.c > b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-4.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..3cd577f5ed5929dab45da1e2a23d7af197065767 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-4.c > @@ -0,0 +1,51 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only > -fdump-tree-vect-details" }*/ > + > +long long p[128]; > + > +bool __attribute__((noipa)) > +fand (int n) > +{ > + bool r = true; > + for (int i = 0; i < n; ++i) > + r &= (p[i] != 0); > + return r; > +} > + > +bool __attribute__((noipa)) > +fior (int n) > +{ > + bool r = false; > + for (int i = 0; i < n; ++i) > + r |= (p[i] != 0); > + return r; > +} > + > +int main() > +{ > + __builtin_memset (p, 1, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (!fand (n)) > + __builtin_abort (); > + > + p[0] = 0; > + for (int n = 1; n < 77; ++n) > + if (fand (n)) > + __builtin_abort (); > + > + __builtin_memset (p, 0, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (fior (n)) > + __builtin_abort (); > + > + p[0] = 1; > + for (int n = 1; n < 77; ++n) > + if (!fior (n)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } > } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-5.c > b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-5.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..c6fa63b7657ea8a176442b7609b10caf771ecbcf > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-5.c > @@ -0,0 +1,49 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only > -fdump-tree-vect-details" }*/ > + > +char p[128]; > + > +bool __attribute__((noipa)) > +fxort (int n) > +{ > + bool r = true; > + for (int i = 0; i < n; ++i) > + r ^= (p[i] != 0); > + return r; > +} > + > +bool __attribute__((noipa)) > +fxorf (int n) > +{ > + bool r = false; > + for (int i = 0; i < n; ++i) > + r ^= (p[i] != 0); > + return r; > +} > + > +int main() > +{ > + __builtin_memset (p, 1, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (fxort (n) != !(n & 1)) > + __builtin_abort (); > + > + for (int n = 0; n < 77; ++n) > + if (fxorf (n) != (n & 1)) > + __builtin_abort (); > + > + __builtin_memset (p, 0, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (!fxort (n)) > + __builtin_abort (); > + > + for (int n = 0; n < 77; ++n) > + if (fxorf (n)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } > } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-6.c > b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-6.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..6d12e6a7cb4fd45bd43165f35cae68a4762f307b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-6.c > @@ -0,0 +1,49 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only > -fdump-tree-vect-details" }*/ > + > +short p[128]; > + > +bool __attribute__((noipa)) > +fxort (int n) > +{ > + bool r = true; > + for (int i = 0; i < n; ++i) > + r ^= (p[i] != 0); > + return r; > +} > + > +bool __attribute__((noipa)) > +fxorf (int n) > +{ > + bool r = false; > + for (int i = 0; i < n; ++i) > + r ^= (p[i] != 0); > + return r; > +} > + > +int main() > +{ > + __builtin_memset (p, 1, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (fxort (n) != !(n & 1)) > + __builtin_abort (); > + > + for (int n = 0; n < 77; ++n) > + if (fxorf (n) != (n & 1)) > + __builtin_abort (); > + > + __builtin_memset (p, 0, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (!fxort (n)) > + __builtin_abort (); > + > + for (int n = 0; n < 77; ++n) > + if (fxorf (n)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } > } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-7.c > b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-7.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..58d6a785f9a0c23d3745927ffc2b9df16dfe2ae4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-7.c > @@ -0,0 +1,49 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only > -fdump-tree-vect-details" }*/ > + > +int p[128]; > + > +bool __attribute__((noipa)) > +fxort (int n) > +{ > + bool r = true; > + for (int i = 0; i < n; ++i) > + r ^= (p[i] != 0); > + return r; > +} > + > +bool __attribute__((noipa)) > +fxorf (int n) > +{ > + bool r = false; > + for (int i = 0; i < n; ++i) > + r ^= (p[i] != 0); > + return r; > +} > + > +int main() > +{ > + __builtin_memset (p, 1, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (fxort (n) != !(n & 1)) > + __builtin_abort (); > + > + for (int n = 0; n < 77; ++n) > + if (fxorf (n) != (n & 1)) > + __builtin_abort (); > + > + __builtin_memset (p, 0, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (!fxort (n)) > + __builtin_abort (); > + > + for (int n = 0; n < 77; ++n) > + if (fxorf (n)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } > } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-8.c > b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-8.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..18ad94a4bd7fe7c87ca0c32cd93f7aee4937cd39 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-8.c > @@ -0,0 +1,49 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only > -fdump-tree-vect-details" }*/ > + > +long long p[128]; > + > +bool __attribute__((noipa)) > +fxort (int n) > +{ > + bool r = true; > + for (int i = 0; i < n; ++i) > + r ^= (p[i] != 0); > + return r; > +} > + > +bool __attribute__((noipa)) > +fxorf (int n) > +{ > + bool r = false; > + for (int i = 0; i < n; ++i) > + r ^= (p[i] != 0); > + return r; > +} > + > +int main() > +{ > + __builtin_memset (p, 1, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (fxort (n) != !(n & 1)) > + __builtin_abort (); > + > + for (int n = 0; n < 77; ++n) > + if (fxorf (n) != (n & 1)) > + __builtin_abort (); > + > + __builtin_memset (p, 0, sizeof(p)); > + > + for (int n = 0; n < 77; ++n) > + if (!fxort (n)) > + __builtin_abort (); > + > + for (int n = 0; n < 77; ++n) > + if (fxorf (n)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { > target { vect_int && vect_condition } } } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-9.c > b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-9.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..7d9a82f5fc3a8104e8fecdebe13cc1bacc6a798a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-9.c > @@ -0,0 +1,63 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only > -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 > -fdump-tree-vect-details" }*/ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +char p[128]; > + > +/* > +** fand: > +** ... > +** uminp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s > +** fmov x[0-9]+, d[0-9]+ > +** cmn x[0-9]+, #1 > +** cset w[0-9]+, eq > +** ... > +*/ > +bool __attribute__((noipa)) > +fand (int n) > +{ > + bool r = true; > + for (int i = 0; i < n; ++i) > + r &= (p[i] != 0); > + return r; > +} > + > +/* > +** fior: > +** ... > +** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s > +** fmov x[0-9]+, d[0-9]+ > +** cmp x[0-9]+, 0 > +** cset w[0-9]+, ne > +** ... > +*/ > +bool __attribute__((noipa)) > +fior (int n) > +{ > + bool r = false; > + for (int i = 0; i < n; ++i) > + r |= (p[i] != 0); > + return r; > +} > + > +/* > +** fxor: > +** ... > +** movi v[0-9]+.16b, 0x1 > +** and v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b > +** addv b[0-9]+, v[0-9]+.16b > +** fmov w[0-9]+, s[0-9]+ > +** and w[0-9]+, w[0-9]+, 1 > +** ... > +*/ > +bool __attribute__((noipa)) > +fxor (int n) > +{ > + bool r = false; > + for (int i = 0; i < n; ++i) > + r ^= (p[i] != 0); > + return r; > +} > + > +/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 3 "vect" } > } */ > + > > > --
