https://gcc.gnu.org/g:2f7d90ef65c6f09106c18b99a9590b8f81933115
commit r16-5587-g2f7d90ef65c6f09106c18b99a9590b8f81933115 Author: Tamar Christina <[email protected]> Date: Tue Nov 25 12:52:56 2025 +0000 AArch64: Implement {cond_}vec_cbranch_{any|all} [PR118974] The following example: #define N 640 int a[N] = {}; int b[N] = {}; int c[N] = {}; void f1 (int d) { for (int i = 0; i < N; i++) { b[i] += a[i]; if (a[i] != d) break; } } today generates with -Ofast -march=armv8-a+sve --param aarch64-autovec-preference=asimd-only .L6: ldr q30, [x3, x1] cmeq v31.4s, v30.4s, v27.4s not v31.16b, v31.16b umaxp v31.4s, v31.4s, v31.4s fmov x4, d31 cbz x4, .L2 Where an we use an Adv. SIMD compare and a reduction sequence to implement early break. This patch implements the new optabs vec_cbranch_any and vec_cbranch_all in order to replace the Adv. SIMD compare and reduction with an SVE flag-setting compare. With this patch the above generates: ptrue p7.b, vl16 .L6: ldr q30, [x3, x1] cmpne p15.s, p7/z, z30.s, z27.s b.none .L2 This optab could also be used for optimizing the Adv. SIMD Sequence when SVE is not available. I have a separate patch for that and will send depending on if this approach is accepted or not. Note that for floating-point we still need the ptest as floating point SVE compares don't set flags. In addition because SVE doesn't have a CMTST equivalent instruction we have to do an explicit AND before the compares. These two cases don't have a speed advantage, but do have a codesize one so I've left them enabled. This patch also eliminated PTEST on normal SVE compare and branch through the introduction of new optabs cond_vec_cbranch_any and cond_vec_cbranch_all. In the example void f1 () { for (int i = 0; i < N; i++) { b[i] += a[i]; if (a[i] > 0) break; } } when compiled for SVE we generate: ld1w z28.s, p7/z, [x4, x0, lsl 2] cmpgt p14.s, p7/z, z28.s, #0 ptest p15, p14.b b.none .L3 Where the ptest isn't needed since the branch only cares about the Z and NZ flags. GCC Today supports eliding this through the pattern *cmp<cmp_op><mode>_ptest however this pattern only supports the removal when the outermost context is a CMP where the predicate is inside the condition itself. This typically only happens for an unpredicated CMP as a ptrue will be generated during expand. In the case about at the GIMPLE level we have mask_patt_14.15_57 = vect__2.11_52 > { 0, ... }; vec_mask_and_58 = loop_mask_48 & mask_patt_14.15_57; if (vec_mask_and_58 != { 0, ... }) goto <bb 5>; [5.50%] else goto <bb 6>; [94.50%] where the loop mask is applied to the compare as an AND. The loop mask is moved into the compare by the pattern *cmp<cmp_op><mode>_and which moves the mask inside if the current mask is a ptrue since p && true -> p. However this happens after combine, and so we can't both move the predicate inside AND eliminate the ptests. To fix this the middle-end will now rewrite the mask into the compare optab and indicate that only the CC flags are required. This allows us to simply not generate the ptest at all, rather than trying to eliminate it later on. After this patch we generate ld1w z28.s, p7/z, [x4, x0, lsl 2] cmpgt p14.s, p7/z, z28.s, #0 b.none .L3 gcc/ChangeLog: PR target/118974 * config/aarch64/aarch64-simd.md (xor<mode>3<vczle><vczbe>): Rename ... (@xor<mode>3<vczle><vczbe>): .. to this. (cbranch<mode>4): Update comments. (<optab><mode>): New. * config/aarch64/aarch64-sve.md (cbranch<mode>4): Update comment. (<optab><mode>): New. (aarch64_ptest<mode>): Rename to ... (@aarch64_ptest<mode>): .. this. * config/aarch64/iterators.md (UNSPEC_CMP_ALL, UNSPEC_CMP_ANY, UNSPEC_COND_CMP_ALL, UNSPEC_COND_CMP_ANY): New. (optabs): Add them. (CBRANCH_CMP, COND_CBRANCH_CMP, cbranch_op): New. * config/aarch64/predicates.md (aarch64_cbranch_compare_operation): New. gcc/testsuite/ChangeLog: PR target/118974 * gcc.target/aarch64/sve/pr119351.c: Update codegen. * gcc.target/aarch64/sve/vect-early-break-cbranch.c: Likewise. * gcc.target/aarch64/vect-early-break-cbranch.c: Likewise. * gcc.target/aarch64/sve/vect-early-break-cbranch_2.c: New test. * gcc.target/aarch64/sve/vect-early-break-cbranch_3.c: New test. * gcc.target/aarch64/sve/vect-early-break-cbranch_4.c: New test. * gcc.target/aarch64/sve/vect-early-break-cbranch_5.c: New test. * gcc.target/aarch64/sve/vect-early-break-cbranch_7.c: New test. * gcc.target/aarch64/sve/vect-early-break-cbranch_8.c: New test. * gcc.target/aarch64/vect-early-break-cbranch_2.c: New test. * gcc.target/aarch64/vect-early-break-cbranch_3.c: New test. Diff: --- gcc/config/aarch64/aarch64-simd.md | 81 +++++++++++- gcc/config/aarch64/aarch64-sve.md | 75 ++++++++++- gcc/config/aarch64/iterators.md | 24 +++- gcc/config/aarch64/predicates.md | 7 + gcc/testsuite/gcc.target/aarch64/sve/pr119351.c | 1 - .../aarch64/sve/vect-early-break-cbranch.c | 18 +-- .../aarch64/sve/vect-early-break-cbranch_2.c | 132 ++++++++++++++++++ .../aarch64/sve/vect-early-break-cbranch_3.c | 132 ++++++++++++++++++ .../aarch64/sve/vect-early-break-cbranch_4.c | 132 ++++++++++++++++++ .../aarch64/sve/vect-early-break-cbranch_5.c | 132 ++++++++++++++++++ .../aarch64/sve/vect-early-break-cbranch_7.c | 147 +++++++++++++++++++++ .../aarch64/sve/vect-early-break-cbranch_8.c | 132 ++++++++++++++++++ .../gcc.target/aarch64/vect-early-break-cbranch.c | 2 - .../aarch64/vect-early-break-cbranch_2.c | 105 +++++++++++++++ .../aarch64/vect-early-break-cbranch_3.c | 112 ++++++++++++++++ 15 files changed, 1213 insertions(+), 19 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 1d2248ab57f5..3f9d5f6295bc 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1155,7 +1155,7 @@ ) ;; For EOR (vector, register) and SVE EOR (vector, immediate) -(define_insn "xor<mode>3<vczle><vczbe>" +(define_insn "@xor<mode>3<vczle><vczbe>" [(set (match_operand:VDQ_I 0 "register_operand") (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand") (match_operand:VDQ_I 2 "aarch64_reg_or_xor_imm")))] @@ -4156,6 +4156,8 @@ ;; Patterns comparing two vectors and conditionally jump +;; Define cbranch on masks. This optab is only called for BOOLEAN_VECTOR_TYPE_P +;; which allows optimizing compares with zero. (define_expand "cbranch<mode>4" [(set (pc) (if_then_else @@ -4196,6 +4198,83 @@ DONE; }) +;; Define vec_cbranch_any and vec_cbranch_all +;; Vector comparison and branch for Adv. SIMD Integer types using SVE +;; instructions. +(define_expand "<optab><mode>" + [(set (pc) + (unspec:VALL + [(if_then_else + (match_operator 0 "aarch64_cbranch_compare_operation" + [(match_operand:VALL 1 "register_operand") + (match_operand:VALL 2 "aarch64_simd_reg_or_zero")]) + (label_ref (match_operand 3 "")) + (pc))] + CBRANCH_CMP))] + "TARGET_SIMD" +{ + auto code = GET_CODE (operands[0]); + if (TARGET_SVE) + { + machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require (); + + rtx in1 = force_lowpart_subreg (full_mode, operands[1], <MODE>mode); + rtx in2; + if (CONST0_RTX (<MODE>mode) == operands[2]) + in2 = CONST0_RTX (full_mode); + else + in2 = force_lowpart_subreg (full_mode, operands[2], <MODE>mode); + + unsigned lanes + = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant (); + machine_mode pred_mode = aarch64_sve_pred_mode (full_mode); + rtx ptrue = aarch64_ptrue_reg (VNx16BImode, lanes); + rtx hint = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode); + + rtx tmp = gen_reg_rtx (pred_mode); + rtx cast_ptrue = gen_lowpart (pred_mode, ptrue); + + if (FLOAT_MODE_P (full_mode)) + { + aarch64_expand_sve_vec_cmp<sve_cmp_suff> (tmp, code, in1, in2); + emit_insn (gen_and3 (pred_mode, tmp, tmp, cast_ptrue)); + emit_insn (gen_aarch64_ptest (pred_mode, ptrue, cast_ptrue, hint, + tmp)); + } + else + emit_insn (gen_aarch64_pred_cmp_ptest (code, full_mode, tmp, ptrue, in1, + in2, cast_ptrue, hint, + cast_ptrue, hint)); + + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx); + emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[3])); + DONE; + } + + rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode); + emit_insn (gen_vec_cmp<mode><v_int_equiv> (tmp, operands[0], operands[1], + operands[2])); + + /* For 128-bit vectors we need a reduction to 64-bit first. */ + if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode))) + { + /* Always reduce using a V4SI. */ + rtx reduc = gen_lowpart (V4SImode, tmp); + rtx res = gen_reg_rtx (V4SImode); + emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc)); + emit_move_insn (tmp, gen_lowpart (<V_INT_EQUIV>mode, res)); + } + + rtx val = gen_reg_rtx (DImode); + emit_move_insn (val, gen_lowpart (DImode, tmp)); + + rtx cc_reg = aarch64_gen_compare_reg (<cbranch_op>, val, const0_rtx); + rtx cmp_rtx = gen_rtx_fmt_ee (<cbranch_op>, DImode, cc_reg, const0_rtx); + emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[3])); + DONE; +}) + ;; Patterns comparing two vectors to produce a mask. (define_expand "vec_cmp<mode><mode>" diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 26c08dbd9208..6d2e87c71e6d 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -9803,7 +9803,8 @@ ;; - PTEST ;; ------------------------------------------------------------------------- -;; Branch based on predicate equality or inequality. +;; Branch based on predicate equality or inequality. This allows PTEST to be +;; combined with other flag setting instructions like ORR -> ORRS. (define_expand "cbranch<mode>4" [(set (pc) (if_then_else @@ -9832,8 +9833,78 @@ } ) +;; Define vec_cbranch_any and vec_cbranch_all +;; Branch based on predicate equality or inequality. +(define_expand "<optab><mode>" + [(set (pc) + (unspec:PRED_ALL + [(if_then_else + (match_operator 0 "aarch64_equality_operator" + [(match_operand:PRED_ALL 1 "register_operand") + (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")]) + (label_ref (match_operand 3 "")) + (pc))] + CBRANCH_CMP))] + "" + { + rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>)); + rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue); + rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode); + rtx pred; + if (operands[2] == CONST0_RTX (<MODE>mode)) + pred = operands[1]; + else + { + pred = gen_reg_rtx (<MODE>mode); + emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1], + operands[2])); + } + emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred)); + + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx); + emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[3])); + DONE; + } +) + +;; Define cond_vec_cbranch_any and cond_vec_cbranch_all +;; Vector comparison and branch for SVE Floating points types instructions. +;; But only on EQ or NE comparisons, which allows us to use integer compares +;; instead and about the ptest. +(define_expand "<optab><mode>" + [(set (pc) + (unspec:SVE_I + [(if_then_else + (match_operator 0 "aarch64_comparison_operator" + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SVE_I 2 "register_operand") + (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]) + (label_ref (match_operand 4 "")) + (pc))] + COND_CBRANCH_CMP))] + "" +{ + auto code = GET_CODE (operands[0]); + rtx in1 = operands[2]; + rtx in2 = operands[3]; + + rtx res = gen_reg_rtx (<VPRED>mode); + rtx gp = gen_lowpart (VNx16BImode, operands[1]); + rtx cast_gp = operands[1]; + rtx flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode); + + emit_insn (gen_aarch64_pred_cmp_ptest (code, <MODE>mode, res, gp, in1, in2, + cast_gp, flag, cast_gp, flag)); + + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx); + emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[4])); + DONE; +}) + ;; See "Description of UNSPEC_PTEST" above for details. -(define_insn "aarch64_ptest<mode>" +(define_insn "@aarch64_ptest<mode>" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa") (match_operand 1) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 0c80b7adeaef..82579b05ff7a 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -841,6 +841,10 @@ UNSPEC_SSHLL ; Used in aarch64-simd.md. UNSPEC_USHLL ; Used in aarch64-simd.md. UNSPEC_ADDP ; Used in aarch64-simd.md. + UNSPEC_CMP_ALL ; Used in aarch64-simd.md. + UNSPEC_CMP_ANY ; Used in aarch64-simd.md. + UNSPEC_COND_CMP_ALL ; Used in aarch64-simd.md. + UNSPEC_COND_CMP_ANY ; Used in aarch64-simd.md. UNSPEC_TBL ; Used in vector permute patterns. UNSPEC_TBLQ ; Used in vector permute patterns. UNSPEC_TBX ; Used in vector permute patterns. @@ -2628,6 +2632,12 @@ (VNx16SI "vnx4bi") (VNx16SF "vnx4bi") (VNx8DI "vnx2bi") (VNx8DF "vnx2bi")]) +;; Map mode to suffix for using an SVE comparison +(define_mode_attr sve_cmp_suff [(V8QI "_int") (V16QI "_int") + (V4HI "_int") (V8HI "_int") (V2SI "_int") + (V4SI "_int") (V2DI "_int") + (V2SF "_float") (V4SF "_float") (V2DF "_float")]) + (define_mode_attr VDOUBLE [(VNx16QI "VNx32QI") (VNx8HI "VNx16HI") (VNx8HF "VNx16HF") (VNx8BF "VNx16BF") @@ -3288,6 +3298,9 @@ (define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD]) +(define_int_iterator CBRANCH_CMP [UNSPEC_CMP_ALL UNSPEC_CMP_ANY]) +(define_int_iterator COND_CBRANCH_CMP [UNSPEC_COND_CMP_ALL UNSPEC_COND_CMP_ANY]) + (define_int_iterator BSL_DUP [1 2]) (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT]) @@ -4231,7 +4244,16 @@ (UNSPEC_COND_SCVTF "float") (UNSPEC_COND_SMAX "smax") (UNSPEC_COND_SMIN "smin") - (UNSPEC_COND_UCVTF "floatuns")]) + (UNSPEC_COND_UCVTF "floatuns") + (UNSPEC_CMP_ALL "vec_cbranch_all") + (UNSPEC_CMP_ANY "vec_cbranch_any") + (UNSPEC_COND_CMP_ALL "cond_vec_cbranch_all") + (UNSPEC_COND_CMP_ANY "cond_vec_cbranch_any")]) + +(define_int_attr cbranch_op [(UNSPEC_CMP_ALL "EQ") + (UNSPEC_CMP_ANY "NE") + (UNSPEC_COND_CMP_ALL "EQ") + (UNSPEC_COND_CMP_ANY "NE")]) (define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan") (UNSPEC_FMAXNM "fmax") diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index f53591d4b045..de1d7d836485 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -481,6 +481,13 @@ (define_special_predicate "aarch64_equality_operator" (match_code "eq,ne")) +(define_special_predicate "aarch64_cbranch_compare_operation" + (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered, + ordered,unlt,unle,unge,ungt") +{ + return TARGET_SIMD; +}) + (define_special_predicate "aarch64_carry_operation" (match_code "ltu,geu") { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c index 85aab355f95f..1ebc735a82f4 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr119351.c @@ -14,7 +14,6 @@ int x[N] __attribute__((aligned(32))); ** ... ** ld1w z[0-9]+.s, p[0-9]+/z, \[x[0-9], x[0-9], lsl 2\] ** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b ** ... */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c index d7cef1105410..48fb407ccee7 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch.c @@ -8,8 +8,7 @@ int b[N] = {0}; ** f1: ** ... ** cmpgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b -** b.(any|none) \.L[0-9]+ +** b(\.?eq|\.none) \.L[0-9]+ ** ... */ void f1 () @@ -25,8 +24,7 @@ void f1 () ** f2: ** ... ** cmpge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b -** b.(any|none) \.L[0-9]+ +** b(\.?eq|\.none) \.L[0-9]+ ** ... */ void f2 () @@ -42,8 +40,7 @@ void f2 () ** f3: ** ... ** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b -** b.(any|none) \.L[0-9]+ +** b(\.?eq|\.none) \.L[0-9]+ ** ... */ void f3 () @@ -59,8 +56,7 @@ void f3 () ** f4: ** ... ** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b -** b.(any|none) \.L[0-9]+ +** b(\.?eq|\.none) \.L[0-9]+ ** ... */ void f4 () @@ -76,8 +72,7 @@ void f4 () ** f5: ** ... ** cmplt p[0-9]+.s, p7/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b -** b.(any|none) .L[0-9]+ +** b(\.?eq|\.none) .L[0-9]+ ** ... */ void f5 () @@ -93,8 +88,7 @@ void f5 () ** f6: ** ... ** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 -** ptest p[0-9]+, p[0-9]+.b -** b.(any|none) \.L[0-9]+ +** b(\.?eq|\.none) \.L[0-9]+ ** ... */ void f6 () diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_2.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_2.c new file mode 100644 index 000000000000..15e9b29a6333 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_2.c @@ -0,0 +1,132 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O3 --param aarch64-autovec-preference=asimd-only" } */ +/* { dg-require-effective-target lp64 } */ + +#include <stdio.h> + +#define N 640 +#ifndef TYPE +#define TYPE int +#endif +#ifndef FMT +#define FMT "d" +#endif + + +TYPE a[N] = {0}; +TYPE b[N] = {0}; + +char *curr_test; + +/* Macro to define a function with a specific comparison */ +#define DEFINE_TEST_FUNC(NAME, OP) \ + __attribute__((noipa)) \ + void NAME(void) { \ + for (int i = 0; i < N; i++) { \ + b[i] += a[i]; \ + if (a[i] OP 0) \ + break; \ + } \ + } + +/* Generate the six comparisons functions using the macro. */ +DEFINE_TEST_FUNC(f1, >) +DEFINE_TEST_FUNC(f2, >=) +DEFINE_TEST_FUNC(f3, ==) +DEFINE_TEST_FUNC(f4, !=) +DEFINE_TEST_FUNC(f5, <) +DEFINE_TEST_FUNC(f6, <=) + +__attribute__((noreturn)) +static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected) +{ + printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n", + m, __FILE__, __LINE__, curr_test, expected, result, i); + __builtin_abort (); +} + +/* Array setup macro. */ +#define RESET_ARRAYS(_aval, _idx, _force, _bval) \ + do { \ + _Pragma("GCC novector") \ + for (int i = 0; i < N; ++i) { \ + a[i] = _aval; \ + b[i] = _bval; \ + } \ + if (_idx >= 0 && _idx < N) \ + a[_idx] = _force; \ + } while (0) + +/* Value check macros. */ +#define CHECK_EQ(_i, _val) \ + do { \ + if (b[_i] != _val) \ + __abort_trace ("single", _i, b[_i], _val); \ + } while (0) + +#define CHECK_RANGE_EQ(_start, _end, _val) \ + do { \ + _Pragma("GCC novector") \ + for (int i = _start; i < _end; ++i) \ + if (b[i] != _val) \ + __abort_trace ("range", i, b[i], _val); \ + } while (0) + +#define str(s) #s +#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \ + do { \ + curr_test = str (_func); \ + RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \ + _func(); \ + _check_stmt; \ + } while (0) + +int main(void) { + /* Break on random intervals. */ + TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10)); + TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10)); + TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0)); + TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1)); + TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5)); + TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7)); + + /* Break on last iteration. */ + TEST_FUNC (f1, 0, N-1, 1, 1, + CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2)); + + TEST_FUNC (f2, -5, N-1, 0, 9, + CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9)); + + TEST_FUNC (f3, 2, N-1, 0, 0, + CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0)); + + TEST_FUNC (f4, 0, N-1, 2, 1, + CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3)); + + TEST_FUNC (f5, 2, N-1, -3, 6, + CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3)); + + TEST_FUNC (f6, 5, N-1, 0, 7, + CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7)); + + /* Condition never met — full loop executes. */ + TEST_FUNC (f1, 0, -1, 0, 2, + CHECK_RANGE_EQ (0, N, 2)); + + TEST_FUNC (f2, -2, -1, 0, 5, + CHECK_RANGE_EQ (0, N, 3)); + + TEST_FUNC (f3, 1, -1, 0, 0, + CHECK_RANGE_EQ (0, N, 1)); + + TEST_FUNC (f4, 0, -1, 0, 7, + CHECK_RANGE_EQ (0, N, 7)); + + TEST_FUNC (f5, 1, -1, 0, 4, + CHECK_RANGE_EQ (0, N, 5)); + + TEST_FUNC (f6, 5, -1, 0, 3, + CHECK_RANGE_EQ (0, N, 8)); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_3.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_3.c new file mode 100644 index 000000000000..da0d90abd731 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_3.c @@ -0,0 +1,132 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O3 --param aarch64-autovec-preference=sve-only" } */ +/* { dg-require-effective-target lp64 } */ + +#include <stdio.h> + +#define N 640 +#ifndef TYPE +#define TYPE int +#endif +#ifndef FMT +#define FMT "d" +#endif + + +TYPE a[N] = {0}; +TYPE b[N] = {0}; + +char *curr_test; + +/* Macro to define a function with a specific comparison */ +#define DEFINE_TEST_FUNC(NAME, OP) \ + __attribute__((noipa)) \ + void NAME(void) { \ + for (int i = 0; i < N; i++) { \ + b[i] += a[i]; \ + if (a[i] OP 0) \ + break; \ + } \ + } + +/* Generate the six comparisons functions using the macro. */ +DEFINE_TEST_FUNC(f1, >) +DEFINE_TEST_FUNC(f2, >=) +DEFINE_TEST_FUNC(f3, ==) +DEFINE_TEST_FUNC(f4, !=) +DEFINE_TEST_FUNC(f5, <) +DEFINE_TEST_FUNC(f6, <=) + +__attribute__((noreturn)) +static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected) +{ + printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n", + m, __FILE__, __LINE__, curr_test, expected, result, i); + __builtin_abort (); +} + +/* Array setup macro. */ +#define RESET_ARRAYS(_aval, _idx, _force, _bval) \ + do { \ + _Pragma("GCC novector") \ + for (int i = 0; i < N; ++i) { \ + a[i] = _aval; \ + b[i] = _bval; \ + } \ + if (_idx >= 0 && _idx < N) \ + a[_idx] = _force; \ + } while (0) + +/* Value check macros. */ +#define CHECK_EQ(_i, _val) \ + do { \ + if (b[_i] != _val) \ + __abort_trace ("single", _i, b[_i], _val); \ + } while (0) + +#define CHECK_RANGE_EQ(_start, _end, _val) \ + do { \ + _Pragma("GCC novector") \ + for (int i = _start; i < _end; ++i) \ + if (b[i] != _val) \ + __abort_trace ("range", i, b[i], _val); \ + } while (0) + +#define str(s) #s +#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \ + do { \ + curr_test = str (_func); \ + RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \ + _func(); \ + _check_stmt; \ + } while (0) + +int main(void) { + /* Break on random intervals. */ + TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10)); + TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10)); + TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0)); + TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1)); + TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5)); + TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7)); + + /* Break on last iteration. */ + TEST_FUNC (f1, 0, N-1, 1, 1, + CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2)); + + TEST_FUNC (f2, -5, N-1, 0, 9, + CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9)); + + TEST_FUNC (f3, 2, N-1, 0, 0, + CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0)); + + TEST_FUNC (f4, 0, N-1, 2, 1, + CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3)); + + TEST_FUNC (f5, 2, N-1, -3, 6, + CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3)); + + TEST_FUNC (f6, 5, N-1, 0, 7, + CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7)); + + /* Condition never met — full loop executes. */ + TEST_FUNC (f1, 0, -1, 0, 2, + CHECK_RANGE_EQ (0, N, 2)); + + TEST_FUNC (f2, -2, -1, 0, 5, + CHECK_RANGE_EQ (0, N, 3)); + + TEST_FUNC (f3, 1, -1, 0, 0, + CHECK_RANGE_EQ (0, N, 1)); + + TEST_FUNC (f4, 0, -1, 0, 7, + CHECK_RANGE_EQ (0, N, 7)); + + TEST_FUNC (f5, 1, -1, 0, 4, + CHECK_RANGE_EQ (0, N, 5)); + + TEST_FUNC (f6, 5, -1, 0, 3, + CHECK_RANGE_EQ (0, N, 8)); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_4.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_4.c new file mode 100644 index 000000000000..e2290e9161cf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_4.c @@ -0,0 +1,132 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O3 --param aarch64-autovec-preference=asimd-only" } */ +/* { dg-require-effective-target lp64 } */ + +#include <stdio.h> + +#define N 640 +#ifndef TYPE +#define TYPE float +#endif +#ifndef FMT +#define FMT ".6f" +#endif + + +TYPE a[N] = {0}; +TYPE b[N] = {0}; + +char *curr_test; + +/* Macro to define a function with a specific comparison */ +#define DEFINE_TEST_FUNC(NAME, OP) \ + __attribute__((noipa)) \ + void NAME(void) { \ + for (int i = 0; i < N; i++) { \ + b[i] += a[i]; \ + if (a[i] OP 0) \ + break; \ + } \ + } + +/* Generate the six comparisons functions using the macro. */ +DEFINE_TEST_FUNC(f1, >) +DEFINE_TEST_FUNC(f2, >=) +DEFINE_TEST_FUNC(f3, ==) +DEFINE_TEST_FUNC(f4, !=) +DEFINE_TEST_FUNC(f5, <) +DEFINE_TEST_FUNC(f6, <=) + +__attribute__((noreturn)) +static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected) +{ + printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n", + m, __FILE__, __LINE__, curr_test, expected, result, i); + __builtin_abort (); +} + +/* Array setup macro. */ +#define RESET_ARRAYS(_aval, _idx, _force, _bval) \ + do { \ + _Pragma("GCC novector") \ + for (int i = 0; i < N; ++i) { \ + a[i] = _aval; \ + b[i] = _bval; \ + } \ + if (_idx >= 0 && _idx < N) \ + a[_idx] = _force; \ + } while (0) + +/* Value check macros. */ +#define CHECK_EQ(_i, _val) \ + do { \ + if (b[_i] != _val) \ + __abort_trace ("single", _i, b[_i], _val); \ + } while (0) + +#define CHECK_RANGE_EQ(_start, _end, _val) \ + do { \ + _Pragma("GCC novector") \ + for (int i = _start; i < _end; ++i) \ + if (b[i] != _val) \ + __abort_trace ("range", i, b[i], _val); \ + } while (0) + +#define str(s) #s +#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \ + do { \ + curr_test = str (_func); \ + RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \ + _func(); \ + _check_stmt; \ + } while (0) + +int main(void) { + /* Break on random intervals. */ + TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10)); + TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10)); + TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0)); + TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1)); + TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5)); + TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7)); + + /* Break on last iteration. */ + TEST_FUNC (f1, 0, N-1, 1, 1, + CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2)); + + TEST_FUNC (f2, -5, N-1, 0, 9, + CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9)); + + TEST_FUNC (f3, 2, N-1, 0, 0, + CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0)); + + TEST_FUNC (f4, 0, N-1, 2, 1, + CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3)); + + TEST_FUNC (f5, 2, N-1, -3, 6, + CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3)); + + TEST_FUNC (f6, 5, N-1, 0, 7, + CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7)); + + /* Condition never met — full loop executes. */ + TEST_FUNC (f1, 0, -1, 0, 2, + CHECK_RANGE_EQ (0, N, 2)); + + TEST_FUNC (f2, -2, -1, 0, 5, + CHECK_RANGE_EQ (0, N, 3)); + + TEST_FUNC (f3, 1, -1, 0, 0, + CHECK_RANGE_EQ (0, N, 1)); + + TEST_FUNC (f4, 0, -1, 0, 7, + CHECK_RANGE_EQ (0, N, 7)); + + TEST_FUNC (f5, 1, -1, 0, 4, + CHECK_RANGE_EQ (0, N, 5)); + + TEST_FUNC (f6, 5, -1, 0, 3, + CHECK_RANGE_EQ (0, N, 8)); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_5.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_5.c new file mode 100644 index 000000000000..eb2295f8ea93 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_5.c @@ -0,0 +1,132 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O3 --param aarch64-autovec-preference=sve-only" } */ +/* { dg-require-effective-target lp64 } */ + +#include <stdio.h> + +#define N 640 +#ifndef TYPE +#define TYPE float +#endif +#ifndef FMT +#define FMT ".6f" +#endif + + +TYPE a[N] = {0}; +TYPE b[N] = {0}; + +char *curr_test; + +/* Macro to define a function with a specific comparison */ +#define DEFINE_TEST_FUNC(NAME, OP) \ + __attribute__((noipa)) \ + void NAME(void) { \ + for (int i = 0; i < N; i++) { \ + b[i] += a[i]; \ + if (a[i] OP 0) \ + break; \ + } \ + } + +/* Generate the six comparisons functions using the macro. */ +DEFINE_TEST_FUNC(f1, >) +DEFINE_TEST_FUNC(f2, >=) +DEFINE_TEST_FUNC(f3, ==) +DEFINE_TEST_FUNC(f4, !=) +DEFINE_TEST_FUNC(f5, <) +DEFINE_TEST_FUNC(f6, <=) + +__attribute__((noreturn)) +static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected) +{ + printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n", + m, __FILE__, __LINE__, curr_test, expected, result, i); + __builtin_abort (); +} + +/* Array setup macro. */ +#define RESET_ARRAYS(_aval, _idx, _force, _bval) \ + do { \ + _Pragma("GCC novector") \ + for (int i = 0; i < N; ++i) { \ + a[i] = _aval; \ + b[i] = _bval; \ + } \ + if (_idx >= 0 && _idx < N) \ + a[_idx] = _force; \ + } while (0) + +/* Value check macros. */ +#define CHECK_EQ(_i, _val) \ + do { \ + if (b[_i] != _val) \ + __abort_trace ("single", _i, b[_i], _val); \ + } while (0) + +#define CHECK_RANGE_EQ(_start, _end, _val) \ + do { \ + _Pragma("GCC novector") \ + for (int i = _start; i < _end; ++i) \ + if (b[i] != _val) \ + __abort_trace ("range", i, b[i], _val); \ + } while (0) + +#define str(s) #s +#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \ + do { \ + curr_test = str (_func); \ + RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \ + _func(); \ + _check_stmt; \ + } while (0) + +int main(void) { + /* Break on random intervals. */ + TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10)); + TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10)); + TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0)); + TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1)); + TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5)); + TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7)); + + /* Break on last iteration. */ + TEST_FUNC (f1, 0, N-1, 1, 1, + CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2)); + + TEST_FUNC (f2, -5, N-1, 0, 9, + CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9)); + + TEST_FUNC (f3, 2, N-1, 0, 0, + CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0)); + + TEST_FUNC (f4, 0, N-1, 2, 1, + CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3)); + + TEST_FUNC (f5, 2, N-1, -3, 6, + CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3)); + + TEST_FUNC (f6, 5, N-1, 0, 7, + CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7)); + + /* Condition never met — full loop executes. */ + TEST_FUNC (f1, 0, -1, 0, 2, + CHECK_RANGE_EQ (0, N, 2)); + + TEST_FUNC (f2, -2, -1, 0, 5, + CHECK_RANGE_EQ (0, N, 3)); + + TEST_FUNC (f3, 1, -1, 0, 0, + CHECK_RANGE_EQ (0, N, 1)); + + TEST_FUNC (f4, 0, -1, 0, 7, + CHECK_RANGE_EQ (0, N, 7)); + + TEST_FUNC (f5, 1, -1, 0, 4, + CHECK_RANGE_EQ (0, N, 5)); + + TEST_FUNC (f6, 5, -1, 0, 3, + CHECK_RANGE_EQ (0, N, 8)); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_7.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_7.c new file mode 100644 index 000000000000..5a6135715d20 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_7.c @@ -0,0 +1,147 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-Ofast --param aarch64-autovec-preference=asimd-only" } */ +/* { dg-require-effective-target lp64 } */ + +#include <stdio.h> +#include <math.h> + +#define N 640 +#ifndef TYPE +#define TYPE float +#endif +#ifndef FMT +#define FMT ".6f" +#endif + +TYPE a[N] = {0}; +TYPE b[N] = {0}; + +char *curr_test; + +/* Macro to define a function with a specific comparison */ +#define DEFINE_TEST_FUNC(NAME, OP) \ + __attribute__((noipa)) \ + void NAME(void) { \ + for (int i = 0; i < N; i++) { \ + b[i] += a[i]; \ + if (a[i] OP 0) \ + break; \ + } \ + } + +/* Generate comparison functions */ +DEFINE_TEST_FUNC(f1, >) +DEFINE_TEST_FUNC(f2, >=) +DEFINE_TEST_FUNC(f3, ==) +DEFINE_TEST_FUNC(f4, !=) +DEFINE_TEST_FUNC(f5, <) +DEFINE_TEST_FUNC(f6, <=) + +/* Example unordered-sensitive loop: breaks if a[i] is unordered with 0 */ +__attribute__((noipa)) +void f7(void) { + for (int i = 0; i < N; i++) { + b[i] += a[i]; + if (__builtin_isunordered(a[i], 0.0f)) + break; + } +} + +__attribute__((noreturn)) +static inline void __abort_trace(const char *m, int i, TYPE result, TYPE expected) { + printf("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n", + m, __FILE__, __LINE__, curr_test, expected, result, i); + __builtin_abort(); +} + +/* Array setup */ +#define RESET_ARRAYS(_aval, _idx, _force, _bval) \ + do { \ + _Pragma("GCC novector") \ + for (int i = 0; i < N; ++i) { \ + a[i] = _aval; \ + b[i] = _bval; \ + } \ + if (_idx >= 0 && _idx < N) \ + a[_idx] = _force; \ + } while (0) + +/* Floating-point comparison macros (with unordered handling) */ +#define CHECK_EQ(_i, _val) do { \ + if (__builtin_isnan (_val) != __builtin_isnan (b[_i]) \ + && b[_i] != _val) \ + __abort_trace ("single", _i, b[_i], _val); \ +} while (0) + +#define CHECK_RANGE_EQ(_start, _end, _val) do { \ + _Pragma("GCC novector") \ + for (int i = _start; i < _end; ++i) \ + if (__builtin_isnan (_val) != __builtin_isnan (b[i]) \ + && b[i] != _val) \ + __abort_trace ("range", i, b[i], _val); \ +} while (0) + +#define str(s) #s +#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \ + do { \ + curr_test = str (_func); \ + RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \ + _func(); \ + _check_stmt; \ + } while (0) + +int main(void) { + /* Break on random intervals. */ + TEST_FUNC(f1, 1.0f, 0, 1.0f, 10.0f, CHECK_EQ(0, 11.0f); CHECK_EQ(1, 10.0f)); + TEST_FUNC(f2, -1.0f, 5, 0.0f, 10.0f, CHECK_EQ(0, 9.0f); CHECK_EQ(5, 10.0f)); + TEST_FUNC(f3, 3.0f, 3, 0.0f, 0.0f, CHECK_EQ(0, 3.0f); CHECK_EQ(3, 0.0f)); + TEST_FUNC(f4, 0.0f, 4, 1.0f, 1.0f, CHECK_EQ(4, 2.0f); CHECK_EQ(5, 1.0f)); + TEST_FUNC(f5, 1.0f, 6, -1.0f, 5.0f, CHECK_EQ(6, 4.0f); CHECK_EQ(7, 5.0f)); + TEST_FUNC(f6, 2.0f, 10, 0.0f, 7.0f, CHECK_EQ(10, 7.0f); CHECK_EQ(11, 7.0f)); + + /* Break on last iteration. */ + TEST_FUNC(f1, 0.0f, N - 1, 1.0f, 1.0f, + CHECK_RANGE_EQ(0, N - 1, 1.0f); CHECK_EQ(N - 1, 2.0f)); + + TEST_FUNC(f2, -5.0f, N - 1, 0.0f, 9.0f, + CHECK_RANGE_EQ(0, N - 1, 4.0f); CHECK_EQ(N - 1, 9.0f)); + + TEST_FUNC(f3, 2.0f, N - 1, 0.0f, 0.0f, + CHECK_RANGE_EQ(0, N - 1, 2.0f); CHECK_EQ(N - 1, 0.0f)); + + TEST_FUNC(f4, 0.0f, N - 1, 2.0f, 1.0f, + CHECK_RANGE_EQ(0, N - 1, 1.0f); CHECK_EQ(N - 1, 3.0f)); + + TEST_FUNC(f5, 2.0f, N - 1, -3.0f, 6.0f, + CHECK_RANGE_EQ(0, N - 1, 8.0f); CHECK_EQ(N - 1, 3.0f)); + + TEST_FUNC(f6, 5.0f, N - 1, 0.0f, 7.0f, + CHECK_RANGE_EQ(0, N - 1, 12.0f); CHECK_EQ(N - 1, 7.0f)); + + /* Condition never met — full loop executes. */ + TEST_FUNC(f1, 0.0f, -1, 0.0f, 2.0f, + CHECK_RANGE_EQ(0, N, 2.0f)); + + TEST_FUNC(f2, -2.0f, -1, 0.0f, 5.0f, + CHECK_RANGE_EQ(0, N, 3.0f)); + + TEST_FUNC(f3, 1.0f, -1, 0.0f, 0.0f, + CHECK_RANGE_EQ(0, N, 1.0f)); + + TEST_FUNC(f4, 0.0f, -1, 0.0f, 7.0f, + CHECK_RANGE_EQ(0, N, 7.0f)); + + TEST_FUNC(f5, 1.0f, -1, 0.0f, 4.0f, + CHECK_RANGE_EQ(0, N, 5.0f)); + + TEST_FUNC(f6, 5.0f, -1, 0.0f, 3.0f, + CHECK_RANGE_EQ(0, N, 8.0f)); + +#if !defined(__FAST_MATH__) + /* Unordered break (NAN in a[i]) */ + TEST_FUNC(f7, 1.0f, 123, NAN, 2.0f, + CHECK_RANGE_EQ(0, 123, 3.0f); CHECK_EQ(123, NAN)); +#endif + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_8.c b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_8.c new file mode 100644 index 000000000000..3dd7a60225e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/vect-early-break-cbranch_8.c @@ -0,0 +1,132 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-Ofast --param aarch64-autovec-preference=asimd-only" } */ +/* { dg-require-effective-target lp64 } */ + +#include <stdio.h> + +#define N 640 +#ifndef TYPE +#define TYPE float +#endif +#ifndef FMT +#define FMT ".6f" +#endif + + +TYPE a[N] = {0}; +TYPE b[N] = {0}; + +char *curr_test; + +/* Macro to define a function with a specific comparison */ +#define DEFINE_TEST_FUNC(NAME, OP) \ + __attribute__((noipa)) \ + void NAME(void) { \ + for (int i = 0; i < N; i++) { \ + b[i] += a[i]; \ + if (a[i] OP 0) \ + break; \ + } \ + } + +/* Generate the six comparisons functions using the macro. */ +DEFINE_TEST_FUNC(f1, >) +DEFINE_TEST_FUNC(f2, >=) +DEFINE_TEST_FUNC(f3, ==) +DEFINE_TEST_FUNC(f4, !=) +DEFINE_TEST_FUNC(f5, <) +DEFINE_TEST_FUNC(f6, <=) + +__attribute__((noreturn)) +static inline void __abort_trace (const char *m, int i, TYPE result, TYPE expected) +{ + printf ("*** [%s] FAIL AT %s:%d in %s - expected %" FMT " but got %" FMT " at pos %d\n", + m, __FILE__, __LINE__, curr_test, expected, result, i); + __builtin_abort (); +} + +/* Array setup macro. */ +#define RESET_ARRAYS(_aval, _idx, _force, _bval) \ + do { \ + _Pragma("GCC novector") \ + for (int i = 0; i < N; ++i) { \ + a[i] = _aval; \ + b[i] = _bval; \ + } \ + if (_idx >= 0 && _idx < N) \ + a[_idx] = _force; \ + } while (0) + +/* Value check macros. */ +#define CHECK_EQ(_i, _val) \ + do { \ + if (b[_i] != _val) \ + __abort_trace ("single", _i, b[_i], _val); \ + } while (0) + +#define CHECK_RANGE_EQ(_start, _end, _val) \ + do { \ + _Pragma("GCC novector") \ + for (int i = _start; i < _end; ++i) \ + if (b[i] != _val) \ + __abort_trace ("range", i, b[i], _val); \ + } while (0) + +#define str(s) #s +#define TEST_FUNC(_func, _aval, _idx, _force, _bval, _check_stmt) \ + do { \ + curr_test = str (_func); \ + RESET_ARRAYS((_aval), (_idx), (_force), (_bval)); \ + _func(); \ + _check_stmt; \ + } while (0) + +int main(void) { + /* Break on random intervals. */ + TEST_FUNC (f1, 1, 0, 1, 10, CHECK_EQ (0, 11); CHECK_EQ (1, 10)); + TEST_FUNC (f2, -1, 5, 0, 10, CHECK_EQ (0, 9); CHECK_EQ (5, 10)); + TEST_FUNC (f3, 3, 3, 0, 0, CHECK_EQ (0, 3); CHECK_EQ (3, 0)); + TEST_FUNC (f4, 0, 4, 1, 1, CHECK_EQ (4, 2); CHECK_EQ (5, 1)); + TEST_FUNC (f5, 1, 6, -1, 5, CHECK_EQ (6, 4); CHECK_EQ (7, 5)); + TEST_FUNC (f6, 2, 10, 0, 7, CHECK_EQ (10, 7); CHECK_EQ (11, 7)); + + /* Break on last iteration. */ + TEST_FUNC (f1, 0, N-1, 1, 1, + CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 2)); + + TEST_FUNC (f2, -5, N-1, 0, 9, + CHECK_RANGE_EQ (0, N-1, 4); CHECK_EQ (N-1, 9)); + + TEST_FUNC (f3, 2, N-1, 0, 0, + CHECK_RANGE_EQ(0, N-1, 2); CHECK_EQ (N-1, 0)); + + TEST_FUNC (f4, 0, N-1, 2, 1, + CHECK_RANGE_EQ (0, N-1, 1); CHECK_EQ (N-1, 3)); + + TEST_FUNC (f5, 2, N-1, -3, 6, + CHECK_RANGE_EQ (0, N-1, 8); CHECK_EQ (N-1, 3)); + + TEST_FUNC (f6, 5, N-1, 0, 7, + CHECK_RANGE_EQ (0, N-1, 12); CHECK_EQ (N-1, 7)); + + /* Condition never met — full loop executes. */ + TEST_FUNC (f1, 0, -1, 0, 2, + CHECK_RANGE_EQ (0, N, 2)); + + TEST_FUNC (f2, -2, -1, 0, 5, + CHECK_RANGE_EQ (0, N, 3)); + + TEST_FUNC (f3, 1, -1, 0, 0, + CHECK_RANGE_EQ (0, N, 1)); + + TEST_FUNC (f4, 0, -1, 0, 7, + CHECK_RANGE_EQ (0, N, 7)); + + TEST_FUNC (f5, 1, -1, 0, 4, + CHECK_RANGE_EQ (0, N, 5)); + + TEST_FUNC (f6, 5, -1, 0, 3, + CHECK_RANGE_EQ (0, N, 8)); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c index 673b781eb6d0..ca4ef498485f 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch.c @@ -50,7 +50,6 @@ void f2 () /* ** f3: ** ... -** cmeq v[0-9]+.4s, v[0-9]+.4s, #0 ** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ** fmov x[0-9]+, d[0-9]+ ** cbn?z x[0-9]+, \.L[0-9]+ @@ -69,7 +68,6 @@ void f3 () /* ** f4: ** ... -** cmtst v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ** umaxp v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s ** fmov x[0-9]+, d[0-9]+ ** cbn?z x[0-9]+, \.L[0-9]+ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c new file mode 100644 index 000000000000..d5cb1946873f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_2.c @@ -0,0 +1,105 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+sve" + +#define N 640 +int a[N] = {0}; +int b[N] = {0}; +/* +** f1: +** ... +** cmpgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** b(\.?eq|\.none) \.L[0-9]+ +** ... +*/ +void f1 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] > 0) + break; + } +} +/* +** f2: +** ... +** cmpge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** b(\.?eq|\.none) \.L[0-9]+ +** ... +*/ +void f2 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] >= 0) + break; + } +} +/* +** f3: +** ... +** cmpeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** b(\.?eq|\.none) \.L[0-9]+ +** ... +*/ +void f3 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] == 0) + break; + } +} +/* +** f4: +** ... +** cmpne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** b(\.?eq|\.none) \.L[0-9]+ +** ... +*/ +void f4 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] != 0) + break; + } +} +/* +** f5: +** ... +** cmplt p[0-9]+.s, p7/z, z[0-9]+.s, #0 +** b(\.?eq|\.none) .L[0-9]+ +** ... +*/ +void f5 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] < 0) + break; + } +} +/* +** f6: +** ... +** cmple p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0 +** b(\.?eq|\.none) \.L[0-9]+ +** ... +*/ +void f6 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] <= 0) + break; + } +} diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c new file mode 100644 index 000000000000..8980b9f04f9a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c @@ -0,0 +1,112 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#pragma GCC target "+sve" + +#define N 640 +float a[N] = {0}; +float b[N] = {0}; + +/* +** f1: +** ... +** fcmgt p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0 +** ptest p[0-9]+, p[0-9]+\.b +** b(\.?eq|\.none) \.L[0-9]+ +** ... +*/ +void f1 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] > 0) + break; + } +} +/* +** f2: +** ... +** fcmge p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0 +** ptest p[0-9]+, p[0-9]+\.b +** b(\.?eq|\.none) \.L[0-9]+ +** ... +*/ +void f2 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] >= 0) + break; + } +} +/* +** f3: +** ... +** fcmeq p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0 +** ptest p[0-9]+, p[0-9]+\.b +** b(\.?eq|\.none) \.L[0-9]+ +** ... +*/ +void f3 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] == 0) + break; + } +} +/* +** f4: +** ... +** fcmne p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0 +** ptest p[0-9]+, p[0-9]+\.b +** b(\.?eq|\.none) \.L[0-9]+ +** ... +*/ +void f4 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] != 0) + break; + } +} +/* +** f5: +** ... +** fcmlt p[0-9]+.s, p7/z, z[0-9]+.s, #0.0 +** ptest p[0-9]+, p[0-9]+\.b +** b(\.?eq|\.none) .L[0-9]+ +** ... +*/ +void f5 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] < 0) + break; + } +} +/* +** f6: +** ... +** fcmle p[0-9]+.s, p[0-9]+/z, z[0-9]+.s, #0.0 +** ptest p[0-9]+, p[0-9]+\.b +** b(\.?eq|\.none) \.L[0-9]+ +** ... +*/ +void f6 () +{ + for (int i = 0; i < N; i++) + { + b[i] += a[i]; + if (a[i] <= 0) + break; + } +}
