> > + } > > + cgraph_simd_clone *sc = node->simdclone; > > + > > + for (unsigned i = 0; i < sc->nargs; ++i) > > + { > > + bool is_mask = false; > > + tree type; > > + switch (sc->args[i].arg_type) > > + { > > + case SIMD_CLONE_ARG_TYPE_MASK: > > + is_mask = true; > > + gcc_fallthrough (); > > + case SIMD_CLONE_ARG_TYPE_VECTOR: > > + case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: > > + case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: > > + type = sc->args[i].vector_type; > > + gcc_assert (VECTOR_TYPE_P (type)); > > + if (node->simdclone->vecsize_mangle == 's') > > + type = simd_clone_adjust_sve_vector_type (type, is_mask, > > + sc->simdlen); > > + else if (is_mask) > > + type = truth_type_for (type); > > Sorry, I have a horrible feeling I knew this once and have forgotten, > but: why do we need to this for non-SVE, when we didn't before? >
I don't think we do either. For Adv. SIMD the truth type is the same as the vector type anyway so this is a no-op. Removed. > > I should have noticed this last time, sorry, but we don't seem to have > any coverage for the linear cases above. Maybe that comes in a later > patch though. > No, Though I tried to make some examples of linear cases. On C the vectorizer just ignores the pragma. In C++ with a linear reference we fail to vectorize because we hit the Safe_len being an int and VF being a poly thing again and so we bail out. I did manage to create a testcase that generates an ICE, but that's due to an existing bug in the vectorizer with how it registers masks. Since that's an existing bug I'm hoping that's not a blocker for this series. gcc/ChangeLog: PR target/96342 * config/aarch64/aarch64-protos.h (add_sve_type_attribute): Declare. * config/aarch64/aarch64-sve-builtins.cc (add_sve_type_attribute): Make visibility global and support use for non_acle types. * config/aarch64/aarch64.cc (aarch64_simd_clone_compute_vecsize_and_simdlen): Create VLA simd clone when no simdlen is provided, according to ABI rules. (simd_clone_adjust_sve_vector_type): New helper function. (aarch64_simd_clone_adjust): Add '+sve' attribute to SVE simd clones and modify types to use SVE types. * omp-simd-clone.cc (simd_clone_mangle): Print 'x' for VLA simdlen. (simd_clone_adjust): Adapt safelen check to be compatible with VLA simdlen. gcc/testsuite/ChangeLog: PR target/96342 * gcc.target/aarch64/declare-simd-2.c: Add SVE clone scan. * gcc.target/aarch64/vect-simd-clone-1.c: New test. * g++.target/aarch64/vect-simd-clone-1.c: New test. Co-authored-by: Victor Do Nascimento <victor.donascime...@arm.com> Co-authored-by: Tamar Christina <tamar.christ...@arm.com> Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf, x86_64-pc-linux-gnu -m32, -m64 and no issues. Ok for master? Thanks, Tamar -- inline copy of patch -- diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index bd17486e9128a21bd205ef1fb3ec3e323408ec59..7ab1316cf56850678d93b6fdb8d19eea18ad78f1 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1151,6 +1151,8 @@ namespace aarch64_sve { #ifdef GCC_TARGET_H bool verify_type_context (location_t, type_context_kind, const_tree, bool); #endif + void add_sve_type_attribute (tree, unsigned int, unsigned int, + const char *, const char *); } extern void aarch64_split_combinev16qi (rtx operands[3]); diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 5acc56f99c65498cbf5593a9ee21540fa55098c2..e93c3a78e6d6c909f5de32ba8672503fc42b8d1c 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -1032,15 +1032,18 @@ static GTY(()) hash_map<tree, registered_function *> *overload_names[2]; /* Record that TYPE is an ABI-defined SVE type that contains NUM_ZR SVE vectors and NUM_PR SVE predicates. MANGLED_NAME, if nonnull, is the ABI-defined - mangling of the type. ACLE_NAME is the <arm_sve.h> name of the type. */ -static void + mangling of the type. mangling of the type. ACLE_NAME is the <arm_sve.h> + name of the type, or null if <arm_sve.h> does not provide the type. */ +void add_sve_type_attribute (tree type, unsigned int num_zr, unsigned int num_pr, const char *mangled_name, const char *acle_name) { tree mangled_name_tree = (mangled_name ? get_identifier (mangled_name) : NULL_TREE); + tree acle_name_tree + = (acle_name ? get_identifier (acle_name) : NULL_TREE); - tree value = tree_cons (NULL_TREE, get_identifier (acle_name), NULL_TREE); + tree value = tree_cons (NULL_TREE, acle_name_tree, NULL_TREE); value = tree_cons (NULL_TREE, mangled_name_tree, value); value = tree_cons (NULL_TREE, size_int (num_pr), value); value = tree_cons (NULL_TREE, size_int (num_zr), value); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 77a2a6bfa3a3a6cd678ceb820d310f44cacfe581..de4c0a0783912b54ac35d7c818c24574b27a4ca0 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -29323,7 +29323,7 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, int num, bool explicit_p) { tree t, ret_type; - unsigned int nds_elt_bits; + unsigned int nds_elt_bits, wds_elt_bits; unsigned HOST_WIDE_INT const_simdlen; if (!TARGET_SIMD) @@ -29368,10 +29368,14 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, if (TREE_CODE (ret_type) != VOID_TYPE) { nds_elt_bits = lane_size (SIMD_CLONE_ARG_TYPE_VECTOR, ret_type); + wds_elt_bits = nds_elt_bits; vec_elts.safe_push (std::make_pair (ret_type, nds_elt_bits)); } else - nds_elt_bits = POINTER_SIZE; + { + nds_elt_bits = POINTER_SIZE; + wds_elt_bits = 0; + } int i; tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl)); @@ -29379,44 +29383,65 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0; t && t != void_list_node; t = TREE_CHAIN (t), i++) { - tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t); + tree type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t); if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM - && !supported_simd_type (arg_type)) + && !supported_simd_type (type)) { if (!explicit_p) ; - else if (COMPLEX_FLOAT_TYPE_P (ret_type)) + else if (COMPLEX_FLOAT_TYPE_P (type)) warning_at (DECL_SOURCE_LOCATION (node->decl), 0, "GCC does not currently support argument type %qT " - "for simd", arg_type); + "for simd", type); else warning_at (DECL_SOURCE_LOCATION (node->decl), 0, "unsupported argument type %qT for simd", - arg_type); + type); return 0; } - unsigned lane_bits = lane_size (clonei->args[i].arg_type, arg_type); + unsigned lane_bits = lane_size (clonei->args[i].arg_type, type); if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) - vec_elts.safe_push (std::make_pair (arg_type, lane_bits)); + vec_elts.safe_push (std::make_pair (type, lane_bits)); if (nds_elt_bits > lane_bits) nds_elt_bits = lane_bits; + if (wds_elt_bits < lane_bits) + wds_elt_bits = lane_bits; } - clonei->vecsize_mangle = 'n'; + /* If we could not determine the WDS type from available parameters/return, + then fallback to using uintptr_t. */ + if (wds_elt_bits == 0) + wds_elt_bits = POINTER_SIZE; + clonei->mask_mode = VOIDmode; poly_uint64 simdlen; - auto_vec<poly_uint64> simdlens (2); + typedef struct + { + poly_uint64 len; + char mangle; + } aarch64_clone_info; + auto_vec<aarch64_clone_info, 3> clones; + /* Keep track of the possible simdlens the clones of this function can have, and check them later to see if we support them. */ if (known_eq (clonei->simdlen, 0U)) { simdlen = exact_div (poly_uint64 (64), nds_elt_bits); if (maybe_ne (simdlen, 1U)) - simdlens.safe_push (simdlen); - simdlens.safe_push (simdlen * 2); + clones.safe_push ({simdlen, 'n'}); + clones.safe_push ({simdlen * 2, 'n'}); + /* Only create an SVE simd clone if we aren't dealing with an unprototyped + function. + We have also disabled support for creating SVE simdclones for functions + with function bodies and any simdclones when -msve-vector-bits is used. + TODO: add support for these. */ + if (prototype_p (TREE_TYPE (node->decl)) + && !node->definition + && !aarch64_sve_vg.is_constant ()) + clones.safe_push ({exact_div (BITS_PER_SVE_VECTOR, wds_elt_bits), 's'}); } else - simdlens.safe_push (clonei->simdlen); + clones.safe_push ({clonei->simdlen, 'n'}); clonei->vecsize_int = 0; clonei->vecsize_float = 0; @@ -29430,11 +29455,12 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, simdclone would cause a vector type to be larger than 128-bits, and reject such a clone. */ unsigned j = 0; - while (j < simdlens.length ()) + while (j < clones.length ()) { bool remove_simdlen = false; for (auto elt : vec_elts) - if (known_gt (simdlens[j] * elt.second, 128U)) + if (clones[j].mangle == 'n' + && known_gt (clones[j].len * elt.second, 128U)) { /* Don't issue a warning for every simdclone when there is no specific simdlen clause. */ @@ -29442,18 +29468,17 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, warning_at (DECL_SOURCE_LOCATION (node->decl), 0, "GCC does not currently support simdlen %wd for " "type %qT", - constant_lower_bound (simdlens[j]), elt.first); + constant_lower_bound (clones[j].len), elt.first); remove_simdlen = true; break; } if (remove_simdlen) - simdlens.ordered_remove (j); + clones.ordered_remove (j); else j++; } - - int count = simdlens.length (); + int count = clones.length (); if (count == 0) { if (explicit_p && known_eq (clonei->simdlen, 0U)) @@ -29470,21 +29495,112 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, } gcc_assert (num < count); - clonei->simdlen = simdlens[num]; + clonei->simdlen = clones[num].len; + clonei->vecsize_mangle = clones[num].mangle; + /* SVE simdclones always have a Mask, so set inbranch to 1. */ + if (clonei->vecsize_mangle == 's') + clonei->inbranch = 1; return count; } -/* Implement TARGET_SIMD_CLONE_ADJUST. */ +/* Helper function to adjust an SVE vector type of an SVE simd clone. Returns + an SVE vector type based on the element type of the vector TYPE, with SIMDLEN + number of elements. If IS_MASK, returns an SVE mask type appropriate for use + with the SVE type it would otherwise return. */ + +static tree +simd_clone_adjust_sve_vector_type (tree type, bool is_mask, poly_uint64 simdlen) +{ + unsigned int num_zr = 0; + unsigned int num_pr = 0; + machine_mode vector_mode; + type = TREE_TYPE (type); + scalar_mode scalar_m = SCALAR_TYPE_MODE (type); + vector_mode = aarch64_sve_data_mode (scalar_m, simdlen).require (); + type = build_vector_type_for_mode (type, vector_mode); + if (is_mask) + { + type = truth_type_for (type); + num_pr = 1; + } + else + num_zr = 1; + + /* We create new types here with the SVE type attribute instead of using ACLE + types as we need to support unpacked vectors which aren't available as + ACLE SVE types. */ + + /* ??? This creates anonymous "SVE type" attributes for all types, + even those that correspond to <arm_sve.h> types. This affects type + compatibility in C/C++, but not in gimple. (Gimple type equivalence + is instead decided by TARGET_COMPATIBLE_VECTOR_TYPES_P.) + Thus a C/C++ definition of the implementation function will have a + different function type from the declaration that this code creates. + However, it doesn't seem worth trying to fix that until we have a + way of handling implementations that operate on unpacked types. */ + type = build_distinct_type_copy (type); + aarch64_sve::add_sve_type_attribute (type, num_zr, num_pr, NULL, NULL); + return type; +} + +/* Implement TARGET_SIMD_CLONE_ADJUST. */ static void aarch64_simd_clone_adjust (struct cgraph_node *node) { - /* Add aarch64_vector_pcs target attribute to SIMD clones so they - use the correct ABI. */ - tree t = TREE_TYPE (node->decl); - TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default", - TYPE_ATTRIBUTES (t)); + + if (node->simdclone->vecsize_mangle == 's') + { + /* This is additive and has no effect if SVE, or a superset thereof, is + already enabled. */ + tree target = build_string (strlen ("+sve") + 1, "+sve"); + if (!aarch64_option_valid_attribute_p (node->decl, NULL_TREE, target, 0)) + gcc_unreachable (); + push_function_decl (node->decl); + } + else + { + /* Add aarch64_vector_pcs target attribute to SIMD clones so they + use the correct ABI. */ + TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default", + TYPE_ATTRIBUTES (t)); + } + + cgraph_simd_clone *sc = node->simdclone; + + for (unsigned i = 0; i < sc->nargs; ++i) + { + bool is_mask = false; + tree type; + switch (sc->args[i].arg_type) + { + case SIMD_CLONE_ARG_TYPE_MASK: + is_mask = true; + gcc_fallthrough (); + case SIMD_CLONE_ARG_TYPE_VECTOR: + case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: + case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: + type = sc->args[i].vector_type; + gcc_assert (VECTOR_TYPE_P (type)); + if (node->simdclone->vecsize_mangle == 's') + type = simd_clone_adjust_sve_vector_type (type, is_mask, + sc->simdlen); + sc->args[i].vector_type = type; + break; + default: + continue; + } + } + if (node->simdclone->vecsize_mangle == 's') + { + tree ret_type = TREE_TYPE (t); + if (VECTOR_TYPE_P (ret_type)) + TREE_TYPE (t) + = simd_clone_adjust_sve_vector_type (ret_type, false, + node->simdclone->simdlen); + pop_function_decl (); + } } /* Implement TARGET_SIMD_CLONE_USABLE. */ @@ -29498,6 +29614,11 @@ aarch64_simd_clone_usable (struct cgraph_node *node, machine_mode vector_mode) if (!TARGET_SIMD || aarch64_sve_mode_p (vector_mode)) return -1; return 0; + case 's': + if (!TARGET_SVE + || !aarch64_sve_mode_p (vector_mode)) + return -1; + return 0; default: gcc_unreachable (); } diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc index 864586207ee89269b5a2cf136487440212d59695..4be25539057251a318409e576e4bc43fc5fd4c40 100644 --- a/gcc/omp-simd-clone.cc +++ b/gcc/omp-simd-clone.cc @@ -541,9 +541,12 @@ simd_clone_mangle (struct cgraph_node *node, pp_string (&pp, "_ZGV"); pp_character (&pp, vecsize_mangle); pp_character (&pp, mask); - /* For now, simdlen is always constant, while variable simdlen pp 'n'. */ - unsigned int len = simdlen.to_constant (); - pp_decimal_int (&pp, (len)); + + unsigned HOST_WIDE_INT len; + if (simdlen.is_constant (&len)) + pp_decimal_int (&pp, (int) (len)); + else + pp_character (&pp, 'x'); for (n = 0; n < clone_info->nargs; ++n) { @@ -1533,8 +1536,8 @@ simd_clone_adjust (struct cgraph_node *node) below). */ loop = alloc_loop (); cfun->has_force_vectorize_loops = true; - /* For now, simlen is always constant. */ - loop->safelen = node->simdclone->simdlen.to_constant (); + /* We can assert that safelen is the 'minimum' simdlen. */ + loop->safelen = constant_lower_bound (node->simdclone->simdlen); loop->force_vectorize = true; loop->header = body_bb; } diff --git a/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C b/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C new file mode 100644 index 0000000000000000000000000000000000000000..90febeca16e5126ed86f2f472b66c3bc3533c773 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C @@ -0,0 +1,88 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv8-a" } */ + +/* Ensure correct creation of SVE Vector-length agnostic (VLA SVE) vector + function calls from scalar versions in accordance with the Vector Function + Application Binary Interface Specification for AArch64 (AAVPCS). + + We check for correctness in: + - Vector function name mangling, with the grammar: + + vector name := prefix "_" name + prefix := "_ZGV" isa mask <len> <parameters> + + Whereby: + - <isa> := "s" for SVE + - <mask> := "M" for Mask + - <len> := "x" for VLA SVE + + resulting in: + <prefix> := "_ZGVsMx" <parameters> + + with each vector parameter contributing a "v" to the prefix. + + - Parameter and return value mapping: + - Unless marked with uniform or linear OpenMP clauses, parameters and + return values are expected to map to vectors. + - Where the lane-size of a parameter is less than the widest data size + for a given function, the resulting vector should be unpacked and + populated via extending loads. + + - Finally, we also make sure we can correctly generate calls to the same + function, differing only in the target architecture (i.e. SVE vs SIMD), + ensuring that each call points to the correctly-mangled vector function + and employs the correct ABI. For example, for `fn' we may expect: + + for #pragma GCC target("+sve"): _ZGVsMxvv_fn + for #pragma GCC target("+simd): _ZGVnN4vv_fn */ + +#pragma GCC target ("+sve") +/* { dg-final { scan-assembler {\s+_ZGVsMxv__Z3fn0i\n} } } */ +extern int __attribute__ ((simd, const)) fn0 (int); +void test_fn0 (int *a, int *b, int n) +{ + for (int i = 0; i < n; ++i) + a[i] += fn0 (b[i]); +} + +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn1si\n} } } */ +extern int __attribute__ ((simd, const)) fn1 (short, int); +void test_fn1 (int *a, int *b, short *c, int n) +{ + for (int i = 0; i < n; ++i) + a[i] = fn1 (c[i], b[i]); +} + +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn2si\n} } } */ +extern short __attribute__ ((simd, const)) fn2 (short, int); +void test_fn2 (short *a, int *b, short *c, int n) +{ + for (int i = 0; i < n; ++i) + a[i] = fn2 (c[i], b[i]); +} + +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn3ic\n} } } */ +extern char __attribute__ ((simd, const)) fn3 (int, char); +void test_fn3 (int *a, int *b, char *c, int n) +{ + for (int i = 0; i < n; ++i) + a[i] = (int) (fn3 (b[i], c[i]) + c[i]); +} + +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn4is\n} } } */ +extern short __attribute__ ((simd, const)) fn4 (int, short); +void test_fn4 (int *a, int *b, short *c, int n) +{ + for (int i = 0; i < n; ++i) + a[i] = (int) (fn4 (b[i], c[i]) + c[i]); +} + +#pragma GCC reset_options +#pragma GCC target ("+simd") +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn4is\n} } } */ +extern short __attribute__ ((simd, const)) fn4 (int, short); +void test_fn5 (int *a, int *b, short *c, int n) +{ + for (int i = 0; i < n; ++i) + a[i] = (int) (fn4 (b[i], c[i]) + c[i]); +} diff --git a/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c index e2e80f0c663dcc182b8cc48b0453558e794f4085..2f4d3a866e55018b8ac8b483b8c33db862a57071 100644 --- a/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c +++ b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c @@ -43,6 +43,7 @@ float f04 (double a) } /* { dg-final { scan-assembler {_ZGVnN2v_f04:} } } */ /* { dg-final { scan-assembler {_ZGVnM2v_f04:} } } */ +/* { dg-final { scan-assembler-not {_ZGVs[0-9a-z]*_f04:} } } */ #pragma omp declare simd uniform(a) linear (b) void f05 (short a, short *b, short c) diff --git a/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c new file mode 100644 index 0000000000000000000000000000000000000000..0d8f497644ca119529a5778b81ae8a78948306e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c @@ -0,0 +1,89 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c99" } */ +/* { dg-additional-options "-O3 -march=armv8-a" } */ + +/* Ensure correct creation of SVE Vector-length agnostic (VLA SVE) vector + function calls from scalar versions in accordance with the Vector Function + Application Binary Interface Specification for AArch64 (AAVPCS). + + We check for correctness in: + - Vector function name mangling, with the grammar: + + vector name := prefix "_" name + prefix := "_ZGV" isa mask <len> <parameters> + + Whereby: + - <isa> := "s" for SVE + - <mask> := "M" for Mask + - <len> := "x" for VLA SVE + + resulting in: + <prefix> := "_ZGVsMx" <parameters> + + with each vector parameter contributing a "v" to the prefix. + + - Parameter and return value mapping: + - Unless marked with uniform or linear OpenMP clauses, parameters and + return values are expected to map to vectors. + - Where the lane-size of a parameter is less than the widest data size + for a given function, the resulting vector should be unpacked and + populated via extending loads. + + - Finally, we also make sure we can correctly generate calls to the same + function, differing only in the target architecture (i.e. SVE vs SIMD), + ensuring that each call points to the correctly-mangled vector function + and employs the correct ABI. For example, for `fn' we may expect: + + for #pragma GCC target("+sve"): _ZGVsMxvv_fn + for #pragma GCC target("+simd): _ZGVnN4vv_fn */ + +#pragma GCC target ("+sve") +/* { dg-final { scan-assembler {\s+_ZGVsMxv_fn0\n} } } */ +extern int __attribute__ ((simd, const)) fn0 (int); +void test_fn0 (int *a, int *b, int n) +{ + for (int i = 0; i < n; ++i) + a[i] += fn0 (b[i]); +} + +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn1\n} } } */ +extern int __attribute__ ((simd, const)) fn1 (short, int); +void test_fn1 (int *a, int *b, short *c, int n) +{ + for (int i = 0; i < n; ++i) + a[i] = fn1 (c[i], b[i]); +} + +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn2\n} } } */ +extern short __attribute__ ((simd, const)) fn2 (short, int); +void test_fn2 (short *a, int *b, short *c, int n) +{ + for (int i = 0; i < n; ++i) + a[i] = fn2 (c[i], b[i]); +} + +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn3\n} } } */ +extern char __attribute__ ((simd, const)) fn3 (int, char); +void test_fn3 (int *a, int *b, char *c, int n) +{ + for (int i = 0; i < n; ++i) + a[i] = (int) (fn3 (b[i], c[i]) + c[i]); +} + +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn4\n} } } */ +extern short __attribute__ ((simd, const)) fn4 (int, short); +void test_fn4 (int *a, int *b, short *c, int n) +{ + for (int i = 0; i < n; ++i) + a[i] = (int) (fn4 (b[i], c[i]) + c[i]); +} + +#pragma GCC reset_options +#pragma GCC target ("+simd") +/* { dg-final { scan-assembler {\s+_ZGVnN4vv_fn4\n} } } */ +extern short __attribute__ ((simd, const)) fn4 (int, short); +void test_fn5 (int *a, int *b, short *c, int n) +{ + for (int i = 0; i < n; ++i) + a[i] = (int) (fn4 (b[i], c[i]) + c[i]); +}
rb19026.patch
Description: rb19026.patch