Tamar Christina <tamar.christ...@arm.com> writes: >> I see you've changed it from: >> >> + rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx); >> + rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx); >> + emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3])); >> >> to: >> >> + emit_jump_insn (gen_cbranchdi4 (operands[0], val, CONST0_RTX (DImode), >> + operands[3])); >> >> Was that to fix a specific problem? The original looked OK to me >> for that part (it was the vector comparison that I was asking about). >> > > No,It was to be more consistent with the Arm and MVE patch. > > Note that I may update the tests to disable scheduling. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * config/aarch64/aarch64-simd.md (cbranch<mode>4): New. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/vect-early-break-cbranch.c: New test. > > --- inline copy of patch --- > > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > c6f2d5828373f2a5272b9d1227bfe34365f9fd09..309ec9535294d6e9cdc530f71d9fe38bb916c966 > 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -3911,6 +3911,45 @@ (define_expand "vcond_mask_<mode><v_int_equiv>" > DONE; > }) > > +;; Patterns comparing two vectors and conditionally jump > + > +(define_expand "cbranch<mode>4" > + [(set (pc) > + (if_then_else > + (match_operator 0 "aarch64_equality_operator" > + [(match_operand:VDQ_I 1 "register_operand") > + (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")]) > + (label_ref (match_operand 3 "")) > + (pc)))] > + "TARGET_SIMD" > +{ > + auto code = GET_CODE (operands[0]); > + rtx tmp = operands[1]; > + > + /* If comparing against a non-zero vector we have to do a comparison first
...an EOR first (or XOR) OK with that change, thanks. Richard > + so we can have a != 0 comparison with the result. */ > + if (operands[2] != CONST0_RTX (<MODE>mode)) > + { > + tmp = gen_reg_rtx (<MODE>mode); > + emit_insn (gen_xor<mode>3 (tmp, operands[1], operands[2])); > + } > + > + /* For 64-bit vectors we need no reductions. */ > + if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode))) > + { > + /* Always reduce using a V4SI. */ > + rtx reduc = gen_lowpart (V4SImode, tmp); > + rtx res = gen_reg_rtx (V4SImode); > + emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc)); > + emit_move_insn (tmp, gen_lowpart (<MODE>mode, res)); > + } > + > + rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx); > + rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx); > + emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3])); > + DONE; > +}) > + > ;; Patterns comparing two vectors to produce a mask. > > (define_expand "vec_cmp<mode><mode>" > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c > b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..c0363c3787270507d7902bb2ac0e39faef63a852 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c > @@ -0,0 +1,124 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3" } */ > +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ > + > +#pragma GCC target "+nosve" > + > +#define N 640 > +int a[N] = {0}; > +int b[N] = {0}; > + > + > +/* > +** f1: > +** ... > +** cmgt v[0-9]+.4s, v[0-9]+.4s, #0 > +** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s > +** fmov x[0-9]+, d[0-9]+ > +** cbnz x[0-9]+, \.L[0-9]+ > +** ... > +*/ > +void f1 () > +{ > + for (int i = 0; i < N; i++) > + { > + b[i] += a[i]; > + if (a[i] > 0) > + break; > + } > +} > + > +/* > +** f2: > +** ... > +** cmge v[0-9]+.4s, v[0-9]+.4s, #0 > +** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s > +** fmov x[0-9]+, d[0-9]+ > +** cbnz x[0-9]+, \.L[0-9]+ > +** ... > +*/ > +void f2 () > +{ > + for (int i = 0; i < N; i++) > + { > + b[i] += a[i]; > + if (a[i] >= 0) > + break; > + } > +} > + > +/* > +** f3: > +** ... > +** cmeq v[0-9]+.4s, v[0-9]+.4s, #0 > +** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s > +** fmov x[0-9]+, d[0-9]+ > +** cbnz x[0-9]+, \.L[0-9]+ > +** ... > +*/ > +void f3 () > +{ > + for (int i = 0; i < N; i++) > + { > + b[i] += a[i]; > + if (a[i] == 0) > + break; > + } > +} > + > +/* > +** f4: > +** ... > +** cmtst v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s > +** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s > +** fmov x[0-9]+, d[0-9]+ > +** cbnz x[0-9]+, \.L[0-9]+ > +** ... > +*/ > +void f4 () > +{ > + for (int i = 0; i < N; i++) > + { > + b[i] += a[i]; > + if (a[i] != 0) > + break; > + } > +} > + > +/* > +** f5: > +** ... > +** cmlt v[0-9]+.4s, v[0-9]+.4s, #0 > +** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s > +** fmov x[0-9]+, d[0-9]+ > +** cbnz x[0-9]+, \.L[0-9]+ > +** ... > +*/ > +void f5 () > +{ > + for (int i = 0; i < N; i++) > + { > + b[i] += a[i]; > + if (a[i] < 0) > + break; > + } > +} > + > +/* > +** f6: > +** ... > +** cmle v[0-9]+.4s, v[0-9]+.4s, #0 > +** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s > +** fmov x[0-9]+, d[0-9]+ > +** cbnz x[0-9]+, \.L[0-9]+ > +** ... > +*/ > +void f6 () > +{ > + for (int i = 0; i < N; i++) > + { > + b[i] += a[i]; > + if (a[i] <= 0) > + break; > + } > +}