SVE loads and stores where the predicate is all-true can be optimized to unpredicated instructions. For example, svuint8_t foo (uint8_t *x) { return svld1 (svptrue_b8 (), x); } was compiled to: foo: ptrue p3.b, all ld1b z0.b, p3/z, [x0] ret but can be compiled to: foo: ldr z0, [x0] ret
Late_combine2 had already been trying to do this, but was missing the instruction: (set (reg/i:VNx16QI 32 v0) (unspec:VNx16QI [ (const_vector:VNx16BI repeat [ (const_int 1 [0x1]) ]) (mem:VNx16QI (reg/f:DI 0 x0 [orig:106 x ] [106]) [0 MEM <svuint8_t> [(unsigned char *)x_2(D)]+0 S[16, 16] A8]) ] UNSPEC_PRED_X)) This patch adds a new define_insn_and_split that matches the missing instruction and splits it to an unpredicated load/store. Because LDR offers fewer addressing modes than LD1[BHWD], the pattern is guarded under reload_completed to only apply the transform once the address modes have been chosen during RA. The patch was bootstrapped and tested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> gcc/ * config/aarch64/aarch64-sve.md (*aarch64_sve_ptrue<mode>_ldr_str): Add define_insn_and_split to fold predicated SVE loads/stores with ptrue predicates to unpredicated instructions. gcc/testsuite/ * gcc.target/aarch64/sve/ptrue_ldr_str.c: New test. * gcc.target/aarch64/sve/cost_model_14.c: Adjust expected outcome. * gcc.target/aarch64/sve/cost_model_4.c: Adjust expected outcome. * gcc.target/aarch64/sve/cost_model_5.c: Adjust expected outcome. * gcc.target/aarch64/sve/cost_model_6.c: Adjust expected outcome. * gcc.target/aarch64/sve/cost_model_7.c: Adjust expected outcome. * gcc.target/aarch64/sve/pcs/varargs_2_f16.c: Adjust expected outcome. * gcc.target/aarch64/sve/pcs/varargs_2_f32.c: Adjust expected outcome. * gcc.target/aarch64/sve/pcs/varargs_2_f64.c: Adjust expected outcome. * gcc.target/aarch64/sve/pcs/varargs_2_mf8.c: Adjust expected outcome. * gcc.target/aarch64/sve/pcs/varargs_2_s16.c: Adjust expected outcome. * gcc.target/aarch64/sve/pcs/varargs_2_s32.c: Adjust expected outcome. * gcc.target/aarch64/sve/pcs/varargs_2_s64.c: Adjust expected outcome. * gcc.target/aarch64/sve/pcs/varargs_2_s8.c: Adjust expected outcome. * gcc.target/aarch64/sve/pcs/varargs_2_u16.c: Adjust expected outcome. * gcc.target/aarch64/sve/pcs/varargs_2_u32.c: Adjust expected outcome. * gcc.target/aarch64/sve/pcs/varargs_2_u64.c: Adjust expected outcome. * gcc.target/aarch64/sve/pcs/varargs_2_u8.c: Adjust expected outcome. * gcc.target/aarch64/sve/peel_ind_2.c: Adjust expected outcome. * gcc.target/aarch64/sve/single_1.c: Adjust expected outcome. * gcc.target/aarch64/sve/single_2.c: Adjust expected outcome. * gcc.target/aarch64/sve/single_3.c: Adjust expected outcome. * gcc.target/aarch64/sve/single_4.c: Adjust expected outcome. --- gcc/config/aarch64/aarch64-sve.md | 17 ++++ .../aarch64/sve/acle/general/attributes_6.c | 8 +- .../gcc.target/aarch64/sve/cost_model_14.c | 4 +- .../gcc.target/aarch64/sve/cost_model_4.c | 3 +- .../gcc.target/aarch64/sve/cost_model_5.c | 3 +- .../gcc.target/aarch64/sve/cost_model_6.c | 3 +- .../gcc.target/aarch64/sve/cost_model_7.c | 3 +- .../aarch64/sve/pcs/varargs_2_f16.c | 93 +++++++++++++++++-- .../aarch64/sve/pcs/varargs_2_f32.c | 93 +++++++++++++++++-- .../aarch64/sve/pcs/varargs_2_f64.c | 93 +++++++++++++++++-- .../aarch64/sve/pcs/varargs_2_mf8.c | 32 +++---- .../aarch64/sve/pcs/varargs_2_s16.c | 93 +++++++++++++++++-- .../aarch64/sve/pcs/varargs_2_s32.c | 93 +++++++++++++++++-- .../aarch64/sve/pcs/varargs_2_s64.c | 93 +++++++++++++++++-- .../gcc.target/aarch64/sve/pcs/varargs_2_s8.c | 34 +++---- .../aarch64/sve/pcs/varargs_2_u16.c | 93 +++++++++++++++++-- .../aarch64/sve/pcs/varargs_2_u32.c | 93 +++++++++++++++++-- .../aarch64/sve/pcs/varargs_2_u64.c | 93 +++++++++++++++++-- .../gcc.target/aarch64/sve/pcs/varargs_2_u8.c | 32 +++---- .../gcc.target/aarch64/sve/peel_ind_2.c | 4 +- .../gcc.target/aarch64/sve/ptrue_ldr_str.c | 31 +++++++ .../gcc.target/aarch64/sve/single_1.c | 11 ++- .../gcc.target/aarch64/sve/single_2.c | 11 ++- .../gcc.target/aarch64/sve/single_3.c | 11 ++- .../gcc.target/aarch64/sve/single_4.c | 11 ++- 25 files changed, 907 insertions(+), 148 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index d4af3706294..03b7194d200 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -702,6 +702,23 @@ } ) +;; Fold predicated loads/stores with a PTRUE predicate to unpredicated +;; loads/stores after RA. +(define_insn_and_split "*aarch64_sve_ptrue<mode>_ldr_str" + [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand") + (unspec:SVE_FULL + [(match_operand:<VPRED> 1 "aarch64_simd_imm_one") + (match_operand:SVE_FULL 2 "aarch64_sve_nonimmediate_operand")] + UNSPEC_PRED_X))] + "TARGET_SVE && reload_completed + && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN) + && ((REG_P (operands[0]) && MEM_P (operands[2])) + || (REG_P (operands[2]) && MEM_P (operands[0])))" + "#" + "&& 1" + [(set (match_dup 0) + (match_dup 2))]) + ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors ;; or vectors for which little-endian ordering isn't acceptable. Memory ;; accesses require secondary reloads. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c index 907637f06f9..eeba533ae74 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c @@ -29,7 +29,7 @@ test_add (fixed_int8_t x, fixed_int8_t y) } /* -** test_add_gnu: +** test_add_gnu: {target aarch64_big_endian } ** ( ** add (z[0-9]+\.b), (?:z0\.b, z1\.b|z1\.b, z0\.b) ** ptrue (p[0-7])\.b, vl32 @@ -41,6 +41,12 @@ test_add (fixed_int8_t x, fixed_int8_t y) ** ) ** ret */ +/* +** test_add_gnu: {target aarch64_little_endian } +** add (z[0-9]+)\.b, (?:z0\.b, z1\.b|z1\.b, z0\.b) +** str \1, \[x8\] +** ret +*/ gnu_int8_t test_add_gnu (fixed_int8_t x, fixed_int8_t y) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c index b65826b0889..d423dcfd03a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c @@ -9,5 +9,7 @@ uint64_t f2(uint64_t *ptr, int n) { return res; } -/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 5 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 5 {target aarch64_big_endian} } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 {target aarch64_little_endian} } } */ +/* { dg-final { scan-assembler-times {\tldr\tz[0-9]+,} 4 {target aarch64_little_endian} } } */ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d,} 8 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c index a7ecfe3a0de..93af4c1ce38 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c @@ -9,4 +9,5 @@ vset (int *restrict dst, int *restrict src, int count) *dst++ = 1; } -/* { dg-final { scan-assembler-times {\tst1w\tz} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz} 1 {target aarch64_big_endian} } } */ +/* { dg-final { scan-assembler-times {\tstr\tz} 1 {target aarch64_little_endian} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c index f3a29fc38a1..fab49edf4bb 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c @@ -9,5 +9,6 @@ vset (int *restrict dst, int *restrict src, int count) *dst++ = 1; } -/* { dg-final { scan-assembler-times {\tst1w\tz} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz} 2 {target aarch64_big_endian} } } */ +/* { dg-final { scan-assembler-times {\tstr\tz} 2 {target aarch64_little_endian} } } */ /* { dg-final { scan-assembler-not {\tstp\tq} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c index 565e1e3ed39..160667bc680 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c @@ -9,4 +9,5 @@ vset (int *restrict dst, int *restrict src, int count) *dst++ = 1; } -/* { dg-final { scan-assembler-times {\tst1w\tz} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz} 1 {target aarch64_big_endian} } } */ +/* { dg-final { scan-assembler-times {\tstr\tz} 1 {target aarch64_little_endian} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c index 31057c0cdc7..b71c673cbfa 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c @@ -9,4 +9,5 @@ vset (int *restrict dst, int *restrict src, int count) *dst++ = 1; } -/* { dg-final { scan-assembler-times {\tst1w\tz} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz} 2 {target aarch64_big_endian} } } */ +/* { dg-final { scan-assembler-times {\tstr\tz} 2 {target aarch64_little_endian} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c index 50e77f9ed57..8d480a8fa1b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int16_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int16_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.h), #9\.0[^\n]* ** ... @@ -35,6 +44,15 @@ callee_0 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.h, #9\.0[^\n]* +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int16_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int16_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int16_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int16_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int16_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.h), #9\.0[^\n]* ** ... @@ -72,6 +99,15 @@ callee_1 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.h, #9\.0[^\n]* +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int16_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int16_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int16_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int16_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int16_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.h), #9\.0[^\n]* ** ... @@ -114,6 +159,15 @@ callee_7 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.h, #9\.0[^\n]* +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int16_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int16_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int16_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int16_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int16_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.h), #9\.0[^\n]* ** ... @@ -163,6 +229,17 @@ callee_8 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.h, #9\.0[^\n]* +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int16_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c index e7b092af5d2..b3c699d314e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int32_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int32_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.s), #9\.0[^\n]* ** ... @@ -35,6 +44,15 @@ callee_0 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.s, #9\.0[^\n]* +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int32_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int32_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int32_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int32_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int32_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.s), #9\.0[^\n]* ** ... @@ -72,6 +99,15 @@ callee_1 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.s, #9\.0[^\n]* +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int32_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int32_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int32_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int32_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int32_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.s), #9\.0[^\n]* ** ... @@ -114,6 +159,15 @@ callee_7 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.s, #9\.0[^\n]* +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int32_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int32_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int32_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int32_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int32_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.s), #9\.0[^\n]* ** ... @@ -163,6 +229,17 @@ callee_8 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.s, #9\.0[^\n]* +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int32_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c index c3389a8a4c3..7078afc6283 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int64_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int64_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.d), #9\.0[^\n]* ** ... @@ -35,6 +44,15 @@ callee_0 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.d, #9\.0[^\n]* +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int64_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int64_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int64_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int64_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int64_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.d), #9\.0[^\n]* ** ... @@ -72,6 +99,15 @@ callee_1 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.d, #9\.0[^\n]* +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int64_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int64_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int64_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int64_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int64_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.d), #9\.0[^\n]* ** ... @@ -114,6 +159,15 @@ callee_7 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.d, #9\.0[^\n]* +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int64_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int64_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int64_t *ptr) ** ... ** ret */ +/* +** callee_8: +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int64_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int64_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.d), #9\.0[^\n]* ** ... @@ -163,6 +229,17 @@ callee_8 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.d, #9\.0[^\n]* +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int64_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c index 28777878d56..fcbac37156c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c @@ -8,9 +8,9 @@ /* ** callee_0: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\] +** ldr (z[0-9]+), \[x1\] ** ... -** st1b \1, \2, \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -32,9 +32,9 @@ callee_0 (mfloat8_t *ptr, ...) ** ... ** umov (w[0-9]+), v0.b\[0\] ** ... -** mov (z[0-9]+\.b), \1 +** mov (z[0-9]+)\.b, \1 ** ... -** st1b \2, p[0-7], \[x1\] +** str \2, \[x1\] ** ... ** ret */ @@ -47,9 +47,9 @@ caller_0 (mfloat8_t *ptr, mfloat8_t in) /* ** callee_1: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\] +** ldr (z[0-9]+), \[x2\] ** ... -** st1b \1, p[0-7], \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -72,9 +72,9 @@ callee_1 (mfloat8_t *ptr, ...) ** ... ** umov (w[0-9]+), v0.b\[0\] ** ... -** mov (z[0-9]+\.b), \1 +** mov (z[0-9]+)\.b, \1 ** ... -** st1b \2, p[0-7], \[x2\] +** str \2, \[x2\] ** ... ** ret */ @@ -87,9 +87,9 @@ caller_1 (mfloat8_t *ptr, mfloat8_t in) /* ** callee_7: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\] +** ldr (z[0-9]+), \[x7\] ** ... -** st1b \1, p[0-7], \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -117,9 +117,9 @@ callee_7 (mfloat8_t *ptr, ...) ** ... ** umov (w[0-9]+), v0.b\[0\] ** ... -** mov (z[0-9]+\.b), \1 +** mov (z[0-9]+)\.b, \1 ** ... -** st1b \2, p[0-7], \[x7\] +** str \2, \[x7\] ** ... ** ret */ @@ -136,9 +136,9 @@ caller_7 (mfloat8_t *ptr, mfloat8_t in) ** ... ** ldr (x[0-9]+), \[sp, \1\] ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\] +** ldr (z[0-9]+), \[\2\] ** ... -** st1b \3, \4, \[x0\] +** str \3, \[x0\] ** ... ** ret */ @@ -167,9 +167,9 @@ callee_8 (mfloat8_t *ptr, ...) ** ... ** umov (w[0-9]+), v0.b\[0\] ** ... -** mov (z[0-9]+\.b), \1 +** mov (z[0-9]+)\.b, \1 ** ... -** st1b \2, p[0-7], \[(x[0-9]+)\] +** str \2, \[(x[0-9]+)\] ** ... ** str \3, \[sp\] ** ... diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c index 3c644e13428..e65e64f6b72 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int16_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int16_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -35,6 +44,15 @@ callee_0 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int16_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int16_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int16_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int16_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int16_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -72,6 +99,15 @@ callee_1 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int16_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int16_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int16_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int16_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int16_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -114,6 +159,15 @@ callee_7 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int16_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int16_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int16_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int16_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int16_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -163,6 +229,17 @@ callee_8 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int16_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c index 652d609d3e4..6488a5fa242 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int32_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int32_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -35,6 +44,15 @@ callee_0 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int32_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int32_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int32_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int32_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int32_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -72,6 +99,15 @@ callee_1 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int32_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int32_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int32_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int32_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int32_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -114,6 +159,15 @@ callee_7 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int32_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int32_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int32_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int32_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int32_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -163,6 +229,17 @@ callee_8 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int32_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c index 72ea6a345cf..4b77b4ff7b0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int64_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int64_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -35,6 +44,15 @@ callee_0 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int64_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int64_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int64_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int64_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int64_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -72,6 +99,15 @@ callee_1 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int64_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int64_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int64_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int64_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int64_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -114,6 +159,15 @@ callee_7 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int64_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int64_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int64_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int64_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int64_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -163,6 +229,17 @@ callee_8 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int64_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c index 02f4bec9a9c..9528ea3f48b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c @@ -8,9 +8,9 @@ /* ** callee_0: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\] +** ldr (z[0-9]+), \[x1\] ** ... -** st1b \1, \2, \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -23,15 +23,15 @@ callee_0 (int8_t *ptr, ...) va_start (va, ptr); vec = va_arg (va, svint8_t); va_end (va); - svst1 (svptrue_b8 (), ptr, vec); +svst1 (svptrue_b8 (), ptr, vec); } /* ** caller_0: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[x1\] +** str \1, \[x1\] ** ... ** ret */ @@ -44,9 +44,9 @@ caller_0 (int8_t *ptr) /* ** callee_1: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\] +** ldr (z[0-9]+), \[x2\] ** ... -** st1b \1, p[0-7], \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -66,9 +66,9 @@ callee_1 (int8_t *ptr, ...) /* ** caller_1: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[x2\] +** str \1, \[x2\] ** ... ** ret */ @@ -81,9 +81,9 @@ caller_1 (int8_t *ptr) /* ** callee_7: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\] +** ldr (z[0-9]+), \[x7\] ** ... -** st1b \1, p[0-7], \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -108,9 +108,9 @@ callee_7 (int8_t *ptr, ...) /* ** caller_7: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[x7\] +** str \1, \[x7\] ** ... ** ret */ @@ -127,9 +127,9 @@ caller_7 (int8_t *ptr) ** ... ** ldr (x[0-9]+), \[sp, \1\] ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\] +** ldr (z[0-9]+), \[\2\] ** ... -** st1b \3, \4, \[x0\] +** str \3, \[x0\] ** ... ** ret */ @@ -155,9 +155,9 @@ callee_8 (int8_t *ptr, ...) /* ** caller_8: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[(x[0-9]+)\] +** str \1, \[(x[0-9]+)\] ** ... ** str \2, \[sp\] ** ... diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c index b60d448c0dc..74ef4daed4f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int16_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int16_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -35,6 +44,15 @@ callee_0 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int16_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int16_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int16_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int16_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int16_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -72,6 +99,15 @@ callee_1 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int16_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int16_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int16_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int16_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int16_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -114,6 +159,15 @@ callee_7 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int16_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int16_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int16_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int16_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int16_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -163,6 +229,17 @@ callee_8 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int16_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c index 5f01464934d..4f9ff785dd9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int32_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int32_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -35,6 +44,15 @@ callee_0 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int32_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int32_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int32_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int32_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int32_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -72,6 +99,15 @@ callee_1 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int32_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int32_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int32_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int32_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int32_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -114,6 +159,15 @@ callee_7 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int32_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int32_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int32_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int32_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int32_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -163,6 +229,17 @@ callee_8 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int32_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c index 986739fdc36..27e437bbe6c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int64_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int64_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -35,6 +44,15 @@ callee_0 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int64_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int64_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int64_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int64_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int64_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -72,6 +99,15 @@ callee_1 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int64_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int64_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int64_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int64_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int64_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -114,6 +159,15 @@ callee_7 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int64_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int64_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int64_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int64_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int64_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -163,6 +229,17 @@ callee_8 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int64_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c index 533cba67713..d43a6daa347 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c @@ -8,9 +8,9 @@ /* ** callee_0: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\] +** ldr (z[0-9]+), \[x1\] ** ... -** st1b \1, \2, \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -29,9 +29,9 @@ callee_0 (int8_t *ptr, ...) /* ** caller_0: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[x1\] +** str \1, \[x1\] ** ... ** ret */ @@ -44,9 +44,9 @@ caller_0 (int8_t *ptr) /* ** callee_1: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\] +** ldr (z[0-9]+), \[x2\] ** ... -** st1b \1, p[0-7], \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -66,9 +66,9 @@ callee_1 (int8_t *ptr, ...) /* ** caller_1: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[x2\] +** str \1, \[x2\] ** ... ** ret */ @@ -81,9 +81,9 @@ caller_1 (int8_t *ptr) /* ** callee_7: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\] +** ldr (z[0-9]+), \[x7\] ** ... -** st1b \1, p[0-7], \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -108,9 +108,9 @@ callee_7 (int8_t *ptr, ...) /* ** caller_7: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[x7\] +** str \1, \[x7\] ** ... ** ret */ @@ -127,9 +127,9 @@ caller_7 (int8_t *ptr) ** ... ** ldr (x[0-9]+), \[sp, \1\] ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\] +** ldr (z[0-9]+), \[\2\] ** ... -** st1b \3, \4, \[x0\] +** str \3, \[x0\] ** ... ** ret */ @@ -155,9 +155,9 @@ callee_8 (int8_t *ptr, ...) /* ** caller_8: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[(x[0-9]+)\] +** str \1, \[(x[0-9]+)\] ** ... ** str \2, \[sp\] ** ... diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c index 985cd0c6d77..f07900b2f0a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c @@ -19,5 +19,7 @@ foo (void) /* We should operate on aligned vectors. */ /* { dg-final { scan-assembler {\t(adrp|adr)\tx[0-9]+, (x|\.LANCHOR0)\n} } } */ /* We should unroll the loop three times. */ -/* { dg-final { scan-assembler-times "\tst1w\t" 3 } } */ +/* { dg-final { scan-assembler-times "\tst1w\t" 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times "\tst1w\t" 2 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times "\tstr\t" 1 { target aarch64_little_endian } } } */ /* { dg-final { scan-assembler {\tptrue\t(p[0-9]+)\.s, vl7\n.*\teor\tp[0-9]+\.b, (p[0-9]+)/z, (\1\.b, \2\.b|\2\.b, \1\.b)\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c b/gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c new file mode 100644 index 00000000000..c3bfa98e5e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target aarch64_little_endian } */ + +#include <arm_sve.h> + +#define TEST(TYPE, TY, B) \ + sv##TYPE ld_##TY (TYPE *x) \ + { \ + return svld1_##TY(svptrue_b##B (), x); \ + } \ + void st_##TY (TYPE *x, sv##TYPE data) \ + { \ + svst1_##TY(svptrue_b##B (), x, data); \ + } + +TEST(bfloat16_t, bf16, 16) +TEST(float16_t, f16, 16) +TEST(float32_t, f32, 32) +TEST(float64_t, f64, 64) +TEST(uint8_t, u8, 8) +TEST(uint16_t, u16, 16) +TEST(uint32_t, u32, 32) +TEST(uint64_t, u64, 64) +TEST(int8_t, s8, 8) +TEST(int16_t, s16, 16) +TEST(int32_t, s32, 32) +TEST(int64_t, s64, 64) + +/* { dg-final { scan-assembler-times {\tldr\tz0, \[x0\]} 12 } } */ +/* { dg-final { scan-assembler-times {\tstr\tz0, \[x0\]} 12 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/single_1.c index d9bb97e12cd..b9c3d3e5241 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/single_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/single_1.c @@ -40,12 +40,13 @@ TEST_LOOP (double, 3.0) /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 11 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 11 { target aarch64_big_endian } } } */ -/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */ /* { dg-final { scan-assembler-not {\twhile} } } */ /* { dg-final { scan-assembler-not {\tb} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_2.c b/gcc/testsuite/gcc.target/aarch64/sve/single_2.c index d27eead17e3..44810364380 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/single_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/single_2.c @@ -16,12 +16,13 @@ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 11 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 11 { target aarch64_big_endian } } } */ -/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */ /* { dg-final { scan-assembler-not {\twhile} } } */ /* { dg-final { scan-assembler-not {\tb} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_3.c b/gcc/testsuite/gcc.target/aarch64/sve/single_3.c index 313a72da067..26614b25217 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/single_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/single_3.c @@ -16,12 +16,13 @@ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 11 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 11 { target aarch64_big_endian } } } */ -/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */ /* { dg-final { scan-assembler-not {\twhile} } } */ /* { dg-final { scan-assembler-not {\tb} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_4.c b/gcc/testsuite/gcc.target/aarch64/sve/single_4.c index 4f46654a5ee..475482584cf 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/single_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/single_4.c @@ -16,12 +16,13 @@ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 11 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 11 { target aarch64_big_endian } } } */ -/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */ /* { dg-final { scan-assembler-not {\twhile} } } */ /* { dg-final { scan-assembler-not {\tb} } } */ -- 2.34.1
smime.p7s
Description: S/MIME cryptographic signature