SVE loads/stores using predicates that select the bottom 8, 16, 32, 64, or 128 bits of a register can be folded to ASIMD LDR/STR, thus avoiding the predicate. For example, svuint8_t foo (uint8_t *x) { return svld1 (svwhilelt_b8 (0, 16), x); } was previously compiled to: foo: ptrue p3.b, vl16 ld1b z0.b, p3/z, [x0] ret
and is now compiled to: foo: ldr q0, [x0] ret The optimization is applied during the expand pass and was implemented by making the following changes to maskload<mode><vpred> and maskstore<mode><vpred>: - the existing define_insns were renamed and new define_expands for maskloads and maskstores were added with predicates for the SVE predicate that match both register operands and constant-vector operands. - if the SVE predicate is a constant vector and contains a pattern as described above, an ASIMD load/store is emitted instead of the SVE load/store. The patch implements the optimization for LD1 and ST1, for 8-bit, 16-bit, 32-bit, 64-bit, and 128-bit moves, for all full SVE data vector modes. Note that VNx8HFmode and VNx2BFmode with a VL2 pattern were excluded, because there are no move patterns for V2HFmode and V2BFmode (yet). Follow-up patches for LD2/3/4 and ST2/3/4 and potentially partial SVE vector modes are planned. The patch was bootstrapped and tested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> gcc/ PR target/117978 * config/aarch64/aarch64-protos.h: Declare aarch64_simd_container_mode, aarch64_sve_full_data_mode_p, aarch64_count_pred_pat_128, aarch64_emit_load_store_through_mode. * config/aarch64/aarch64-sve.md (maskload<mode><vpred>): New define_expand folding maskloads with certain predicate patterns to ASIMD loads. (*aarch64_maskload<mode><vpred>): Renamed from maskload<mode><vpred>. (maskstore<mode><vpred>): New define_expand folding maskstores with certain predicate patterns to ASIMD stores. (*aarch64_maskstore<mode><vpred>): Renamed from maskstore<mode><vpred>. * config/aarch64/aarch64.cc (aarch64_sve_full_data_mode_p): New function returning true if a given mode is a full SVE data vector mode. (aarch64_emit_load_store_through_mode): New function emitting a load/store through subregs of a given mode. (aarch64_emit_sve_pred_move): Refactor to use aarch64_emit_load_store_through_mode. (aarch64_v8_mode): New function returning an 8-bit mode. (aarch64_v16_mode): New function returning a 16-bit mode. (aarch64_v32_mode): New function returning a 32-bit mode. (aarch64_simd_container_mode): Make public and extend to find 8-bit, 16-bit, and 32-bit container modes. (aarch64_count_pred_pat_128): New function to find SVE predicates with VL1, VL2, VL4, VL8, or VL16 patterns. * config/aarch64/iterators.md (elem_bits): Extend to cover partial SVE vector modes. * config/aarch64/predicates.md (aarch64_sve_reg_or_const_pred): New predicate matching register operands or constant-vector operands. gcc/testsuite/ PR target/117978 * gcc.target/aarch64/sve/acle/general/whilelt_5.c: Adjust expected outcome. * gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c: New test. * gcc.target/aarch64/sve/while_7.c: Adjust expected outcome. * gcc.target/aarch64/sve/while_9.c: Adjust expected outcome. --- gcc/config/aarch64/aarch64-protos.h | 4 + gcc/config/aarch64/aarch64-sve.md | 62 ++++++++- gcc/config/aarch64/aarch64.cc | 128 +++++++++++++++--- gcc/config/aarch64/iterators.md | 19 ++- gcc/config/aarch64/predicates.md | 4 + .../aarch64/sve/acle/general/whilelt_5.c | 24 +++- .../aarch64/sve/ldst_ptrue_pat_128_to_neon.c | 81 +++++++++++ .../gcc.target/aarch64/sve/while_7.c | 4 +- .../gcc.target/aarch64/sve/while_9.c | 2 +- 9 files changed, 296 insertions(+), 32 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 1ca86c9d175..a03f091fe3a 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -857,6 +857,7 @@ enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx); bool aarch64_advsimd_struct_mode_p (machine_mode mode); opt_machine_mode aarch64_v64_mode (scalar_mode); opt_machine_mode aarch64_v128_mode (scalar_mode); +machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64); opt_machine_mode aarch64_full_sve_mode (scalar_mode); bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode); bool aarch64_valid_fp_move (rtx, rtx, machine_mode); @@ -903,8 +904,10 @@ opt_machine_mode aarch64_advsimd_vector_array_mode (machine_mode, unsigned HOST_WIDE_INT); opt_machine_mode aarch64_sve_data_mode (scalar_mode, poly_uint64); bool aarch64_sve_mode_p (machine_mode); +bool aarch64_sve_full_data_mode_p (machine_mode); HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int); bool aarch64_sve_cnt_immediate_p (rtx); +int aarch64_count_pred_pat_128 (rtx, machine_mode); bool aarch64_sve_scalar_inc_dec_immediate_p (rtx); bool aarch64_sve_rdvl_immediate_p (rtx); bool aarch64_sve_addvl_addpl_immediate_p (rtx); @@ -1026,6 +1029,7 @@ rtx aarch64_ptrue_reg (machine_mode, unsigned int); rtx aarch64_ptrue_reg (machine_mode, machine_mode); rtx aarch64_pfalse_reg (machine_mode); bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *); +void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode); void aarch64_emit_sve_pred_move (rtx, rtx, rtx); void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode); bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index d4af3706294..d9392e3611a 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1286,7 +1286,36 @@ ;; ------------------------------------------------------------------------- ;; Predicated LD1 (single). -(define_insn "maskload<mode><vpred>" +(define_expand "maskload<mode><vpred>" + [(set (match_operand:SVE_ALL 0 "register_operand") + (unspec:SVE_ALL + [(match_operand:<VPRED> 2 "aarch64_sve_reg_or_const_pred") + (match_operand:SVE_ALL 1 "memory_operand") + (match_operand:SVE_ALL 3 "aarch64_maskload_else_operand")] + UNSPEC_LD1_SVE))] + "TARGET_SVE" + { + int pat_cnt = aarch64_count_pred_pat_128 (operands[2], <MODE>mode); + int width = <elem_bits> * pat_cnt; + if (aarch64_sve_full_data_mode_p (<MODE>mode) + && pat_cnt && (pat_cnt == 1 || !BYTES_BIG_ENDIAN) + && known_le (width, 128)) + { + machine_mode mode = aarch64_simd_container_mode (<VEL>mode, width); + if (mode != VOIDmode) + { + aarch64_emit_load_store_through_mode (operands[0], + operands[1], mode); + DONE; + } + } + if (!REG_P (operands[2])) + operands[2] = force_reg (<VPRED>mode, operands[2]); + } +) + +;; Predicated LD1 (single). +(define_insn "*aarch64_maskload<mode><vpred>" [(set (match_operand:SVE_ALL 0 "register_operand" "=w") (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") @@ -2287,7 +2316,36 @@ ;; ------------------------------------------------------------------------- ;; Predicated ST1 (single). -(define_insn "maskstore<mode><vpred>" +(define_expand "maskstore<mode><vpred>" + [(set (match_operand:SVE_ALL 0 "memory_operand") + (unspec:SVE_ALL + [(match_operand:<VPRED> 2 "aarch64_sve_reg_or_const_pred") + (match_operand:SVE_ALL 1 "register_operand") + (match_dup 0)] + UNSPEC_ST1_SVE))] + "TARGET_SVE" + { + int pat_cnt = aarch64_count_pred_pat_128 (operands[2], <MODE>mode); + int width = <elem_bits> * pat_cnt; + if (aarch64_sve_full_data_mode_p (<MODE>mode) + && pat_cnt && (pat_cnt == 1 || !BYTES_BIG_ENDIAN) + && known_le (width, 128)) + { + machine_mode mode = aarch64_simd_container_mode (<VEL>mode, width); + if (mode != VOIDmode) + { + aarch64_emit_load_store_through_mode (operands[0], + operands[1], mode); + DONE; + } + } + if (!REG_P (operands[2])) + operands[2] = force_reg (<VPRED>mode, operands[2]); + } +) + +;; Predicated ST1 (single). +(define_insn "*aarch64_maskstore<mode><vpred>" [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index fff8d9da49d..720c00980d8 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -352,7 +352,6 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, bool is_packed); -static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64); static bool aarch64_print_address_internal (FILE*, machine_mode, rtx, aarch64_addr_query_type); @@ -1765,6 +1764,14 @@ aarch64_sve_data_mode_p (machine_mode mode) return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA; } +/* Return true if MODE is an SVE data vector mode, but not a partial mode; + either a single vector or a structure of vectors. */ +bool +aarch64_sve_full_data_mode_p (machine_mode mode) +{ + return aarch64_classify_vector_mode (mode) == VEC_SVE_DATA; +} + /* Return the number of defined bytes in one constituent vector of SVE mode MODE, which has vector flags VEC_FLAGS. */ static poly_int64 @@ -6410,8 +6417,27 @@ aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl, return gen_rtx_MEM (mode, force_reg (Pmode, addr)); } -/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate - that is known to contain PTRUE. */ +/* Emit a load/store from a subreg of SRC to a subreg of DEST. + The subregs have mode NEW_MODE. Use only for reg<->mem moves. */ +void +aarch64_emit_load_store_through_mode (rtx dest, rtx src, machine_mode new_mode) +{ + gcc_assert ((REG_P (src) && MEM_P (dest)) + || (REG_P (dest) && MEM_P (src))); + machine_mode mode = GET_MODE (dest); + if (MEM_P (src)) + { + rtx tmp = force_reg (new_mode, adjust_address (src, new_mode, 0)); + emit_move_insn (dest, lowpart_subreg (mode, tmp, new_mode)); + } + else + emit_move_insn (adjust_address (dest, new_mode, 0), + force_lowpart_subreg (new_mode, src, mode)); +} + +/* PRED is a predicate that is known to contain PTRUE. + For 128-bit VLS loads/stores, emit LDR/STR. + Else, emit an SVE predicated move from SRC to DEST. */ void aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src) @@ -6421,16 +6447,7 @@ aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src) && known_eq (GET_MODE_SIZE (mode), 16) && aarch64_classify_vector_mode (mode) == VEC_SVE_DATA && !BYTES_BIG_ENDIAN) - { - if (MEM_P (src)) - { - rtx tmp = force_reg (V16QImode, adjust_address (src, V16QImode, 0)); - emit_move_insn (dest, lowpart_subreg (mode, tmp, V16QImode)); - } - else - emit_move_insn (adjust_address (dest, V16QImode, 0), - force_lowpart_subreg (V16QImode, src, mode)); - } + aarch64_emit_load_store_through_mode (dest, src, V16QImode); else { expand_operand ops[3]; @@ -22519,6 +22536,57 @@ aarch64_full_sve_mode (scalar_mode mode) } } +/* Return the 8-bit mode for element mode MODE, if it exists. */ +opt_machine_mode +aarch64_v8_mode (scalar_mode mode) +{ + switch (mode) + { + case E_QImode: + return E_QImode; + default: + return {}; + } +} + +/* Return the 16-bit mode for element mode MODE, if it exists. */ +opt_machine_mode +aarch64_v16_mode (scalar_mode mode) +{ + switch (mode) + { + case E_HFmode: + return E_HFmode; + case E_BFmode: + return E_BFmode; + case E_HImode: + return E_HImode; + case E_QImode: + return E_HImode; + default: + return {}; + } +} + +/* Return the 32-bit mode for element mode MODE, if it exists. */ +opt_machine_mode +aarch64_v32_mode (scalar_mode mode) +{ + switch (mode) + { + case E_SFmode: + return E_SFmode; + case E_SImode: + return E_SImode; + case E_HImode: + return E_SImode; + case E_QImode: + return E_SImode; + default: + return {}; + } +} + /* Return the 64-bit Advanced SIMD vector mode for element mode MODE, if it exists. */ opt_machine_mode @@ -22573,7 +22641,7 @@ aarch64_v128_mode (scalar_mode mode) /* Return appropriate SIMD container for MODE within a vector of WIDTH bits. */ -static machine_mode +machine_mode aarch64_simd_container_mode (scalar_mode mode, poly_int64 width) { if (TARGET_SVE @@ -22581,13 +22649,21 @@ aarch64_simd_container_mode (scalar_mode mode, poly_int64 width) && known_eq (width, BITS_PER_SVE_VECTOR)) return aarch64_full_sve_mode (mode).else_mode (word_mode); - gcc_assert (known_eq (width, 64) || known_eq (width, 128)); + gcc_assert (known_eq (width, 64) || known_eq (width, 128) + || known_eq (width, 32) || known_eq (width, 16) + || known_eq (width, 8)); if (TARGET_BASE_SIMD) { if (known_eq (width, 128)) return aarch64_v128_mode (mode).else_mode (word_mode); - else + else if (known_eq (width, 64)) return aarch64_v64_mode (mode).else_mode (word_mode); + else if (known_eq (width, 32)) + return aarch64_v32_mode (mode).else_mode (VOIDmode); + else if (known_eq (width, 16)) + return aarch64_v16_mode (mode).else_mode (VOIDmode); + else + return aarch64_v8_mode (mode).else_mode (VOIDmode); } return word_mode; } @@ -23526,6 +23602,26 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info *info, return false; } +/* If PRED is a patterned SVE PTRUE predicate with patterns + VL1, VL2, VL4, VL8, or VL16, return the number of active lanes + for the mode MODE. Else return 0. */ +int +aarch64_count_pred_pat_128 (rtx pred, machine_mode mode) +{ + struct simd_immediate_info info; + bool is_valid; + is_valid = aarch64_simd_valid_imm (pred, &info, AARCH64_CHECK_MOV); + if (!is_valid || info.insn != simd_immediate_info::PTRUE) + return 0; + aarch64_svpattern pattern = info.u.pattern; + unsigned int cnt + = aarch64_fold_sve_cnt_pat (pattern, 128 / GET_MODE_UNIT_BITSIZE (mode)); + if (pattern <= AARCH64_SV_VL16 && pow2p_hwi (cnt)) + return cnt; + else + return 0; +} + /* Return true if OP is a valid SIMD move immediate for SVE or AdvSIMD. */ bool aarch64_simd_valid_mov_imm (rtx op) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 146453b0516..e7db193383b 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1384,12 +1384,19 @@ ;; element. (define_mode_attr elem_bits [(VNx16BI "8") (VNx8BI "16") (VNx4BI "32") (VNx2BI "64") - (VNx16QI "8") (VNx32QI "8") (VNx64QI "8") - (VNx8HI "16") (VNx16HI "16") (VNx32HI "16") - (VNx8HF "16") (VNx16HF "16") (VNx32HF "16") - (VNx8BF "16") (VNx16BF "16") (VNx32BF "16") - (VNx4SI "32") (VNx8SI "32") (VNx16SI "32") - (VNx4SF "32") (VNx8SF "32") (VNx16SF "32") + (VNx2QI "8") (VNx4QI "8") + (VNx8QI "8") (VNx16QI "8") + (VNx32QI "8") (VNx64QI "8") + (VNx2HI "16") (VNx4HI "16") (VNx8HI "16") + (VNx16HI "16") (VNx32HI "16") + (VNx2HF "16") (VNx4HF "16") (VNx8HF "16") + (VNx16HF "16") (VNx32HF "16") + (VNx2BF "16") (VNx4BF "16") (VNx8BF "16") + (VNx16BF "16") (VNx32BF "16") + (VNx2SI "32") (VNx4SI "32") + (VNx8SI "32") (VNx16SI "32") + (VNx2SF "32") (VNx4SF "32") + (VNx8SF "32") (VNx16SF "32") (VNx2DI "64") (VNx4DI "64") (VNx8DI "64") (VNx2DF "64") (VNx4DF "64") (VNx8DF "64") (VNx1TI "128")]) diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 1ab1c696c62..1eeda58a1e5 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -813,6 +813,10 @@ (and (match_code "const") (match_test "aarch64_sve_ptrue_svpattern_p (op, NULL)"))) +(define_predicate "aarch64_sve_reg_or_const_pred" + (ior (match_operand 0 "register_operand") + (match_code "const_vector"))) + (define_predicate "aarch64_sve_arith_immediate" (and (match_code "const,const_vector") (match_test "aarch64_sve_arith_immediate_p (mode, op, false)"))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c index f06a74aa2da..05e266aad7d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c @@ -11,8 +11,7 @@ extern "C" { /* ** load_vl1: -** ptrue (p[0-7])\.[bhsd], vl1 -** ld1h z0\.h, \1/z, \[x0\] +** ldr h0, \[x0\] ** ret */ svint16_t @@ -22,7 +21,12 @@ load_vl1 (int16_t *ptr) } /* -** load_vl2: +** load_vl2: { target aarch64_little_endian } +** ldr s0, \[x0\] +** ret +*/ +/* +** load_vl2: { target aarch64_big_endian } ** ptrue (p[0-7])\.h, vl2 ** ld1h z0\.h, \1/z, \[x0\] ** ret @@ -46,7 +50,12 @@ load_vl3 (int16_t *ptr) } /* -** load_vl4: +** load_vl4: { target aarch64_little_endian } +** ldr d0, \[x0\] +** ret +*/ +/* +** load_vl4: { target aarch64_big_endian } ** ptrue (p[0-7])\.h, vl4 ** ld1h z0\.h, \1/z, \[x0\] ** ret @@ -94,7 +103,12 @@ load_vl7 (int16_t *ptr) } /* -** load_vl8: +** load_vl8: { target aarch64_little_endian } +** ldr q0, \[x0\] +** ret +*/ +/* +** load_vl8: { target aarch64_big_endian } ** ptrue (p[0-7])\.h, vl8 ** ld1h z0\.h, \1/z, \[x0\] ** ret diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c b/gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c new file mode 100644 index 00000000000..855514a34e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c @@ -0,0 +1,81 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target aarch64_little_endian } */ + +#include <arm_sve.h> + +#define TEST(TYPE, TY, W, B) \ + sv##TYPE \ + ld1_##TY##W##B##_1 (TYPE *x) \ + { \ + svbool_t pg = svwhilelt_b##B (0, W); \ + return svld1_##TY##B (pg, x); \ + } \ + sv##TYPE \ + ld1_##TY##W##B##_2 (TYPE *x) \ + { \ + svbool_t pg = svptrue_pat_b##B ((enum svpattern) (W > 8 ? 9 : W)); \ + return svld1_##TY##B (pg, x); \ + } \ + void \ + st1_##TY##W##B##_1 (TYPE *x, sv##TYPE data) \ + { \ + svbool_t pg = svwhilelt_b##B (0, W); \ + return svst1_##TY##B (pg, x, data); \ + } \ + void \ + st1_##TY##W##B##_2 (TYPE *x, sv##TYPE data) \ + { \ + svbool_t pg = svptrue_pat_b##B ((enum svpattern) (W > 8 ? 9 : W)); \ + return svst1_##TY##B (pg, x, data); \ + } \ + +#define TEST64(TYPE, TY, B) \ + TEST (TYPE, TY, 1, B) \ + TEST (TYPE, TY, 2, B) \ + +#define TEST32(TYPE, TY, B) \ + TEST64 (TYPE, TY, B) \ + TEST (TYPE, TY, 4, B) \ + +#define TEST16(TYPE, TY, B) \ + TEST32 (TYPE, TY, B) \ + TEST (TYPE, TY, 8, B) \ + +#define TEST8(TYPE, TY, B) \ + TEST16 (TYPE, TY, B) \ + TEST (TYPE, TY, 16, B) + +#define T(TYPE, TY, B) \ + TEST##B (TYPE, TY, B) + +T (bfloat16_t, bf, 16) +T (float16_t, f, 16) +T (float32_t, f, 32) +T (float64_t, f, 64) +T (int8_t, s, 8) +T (int16_t, s, 16) +T (int32_t, s, 32) +T (int64_t, s, 64) +T (uint8_t, u, 8) +T (uint16_t, u, 16) +T (uint32_t, u, 32) +T (uint64_t, u, 64) + +/* { dg-final { scan-assembler-times {\tldr\tq0, \[x0\]} 24 } } */ +/* { dg-final { scan-assembler-times {\tldr\td0, \[x0\]} 24 } } */ +/* { dg-final { scan-assembler-times {\tldr\ts0, \[x0\]} 14 } } */ +/* { dg-final { scan-assembler-times {\tldr\th0, \[x0\]} 12 } } */ +/* { dg-final { scan-assembler-times {\tldr\tb0, \[x0\]} 4 } } */ + +/* { dg-final { scan-assembler-times {\tstr\tq0, \[x0\]} 24 } } */ +/* { dg-final { scan-assembler-times {\tstr\td0, \[x0\]} 24 } } */ +/* { dg-final { scan-assembler-times {\tstr\ts0, \[x0\]} 14 } } */ +/* { dg-final { scan-assembler-times {\tstr\th0, \[x0\]} 12 } } */ +/* { dg-final { scan-assembler-times {\tstr\tb0, \[x0\]} 4 } } */ + +/* The optimization is not applied to VNx8HFmode and VNx8BFmode with a + VL2 predicate, because there are no move patterns defined for V2HF + and V2BF. */ +/* { dg-final { scan-assembler-times {\tptrue\tp([0-7]).h, vl2\n\tld1h\tz0.h, p\1/z, \[x0\]} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp([0-7]).h, vl2\n\tst1h\tz0.h, p\1, \[x0\]} 4 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_7.c b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c index a66a20d21f6..ab2fa3646fc 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/while_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c @@ -19,7 +19,7 @@ TEST_ALL (ADD_LOOP) -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl8\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\td[0-9]+, \[x0\]} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\tq[0-9]+, \[x0\]} 1 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_9.c b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c index dd3f404ab39..99940dd73fa 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/while_9.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c @@ -19,7 +19,7 @@ TEST_ALL (ADD_LOOP) -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\tq[0-9]+\, \[x0\]} 1 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 2 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ -- 2.34.1
smime.p7s
Description: S/MIME cryptographic signature