ping
> -----Original Message----- > From: Tamar Christina <tamar.christ...@arm.com> > Sent: Wednesday, December 4, 2024 12:17 PM > To: gcc-patches@gcc.gnu.org > Cc: nd <n...@arm.com>; Richard Earnshaw <richard.earns...@arm.com>; > ktkac...@gcc.gnu.org; Richard Sandiford <richard.sandif...@arm.com> > Subject: [PATCH 2/7]AArch64: Add SVE support for simd clones [PR96342] > > Hi All, > > This patch finalizes adding support for the generation of SVE simd clones when > no simdlen is provided, following the ABI rules where the widest data type > determines the minimum amount of elements in a length agnostic vector. > > gcc/ChangeLog: > > PR target/96342 > * config/aarch64/aarch64-protos.h (add_sve_type_attribute): Declare. > * config/aarch64/aarch64-sve-builtins.cc (add_sve_type_attribute): Make > visibility global and support use for non_acle types. > * config/aarch64/aarch64.cc > (aarch64_simd_clone_compute_vecsize_and_simdlen): Create VLA simd > clone > when no simdlen is provided, according to ABI rules. > (simd_clone_adjust_sve_vector_type): New helper function. > (aarch64_simd_clone_adjust): Add '+sve' attribute to SVE simd clones > and modify types to use SVE types. > * omp-simd-clone.cc (simd_clone_mangle): Print 'x' for VLA simdlen. > (simd_clone_adjust): Adapt safelen check to be compatible with VLA > simdlen. > > gcc/testsuite/ChangeLog: > > PR target/96342 > * gcc.target/aarch64/declare-simd-2.c: Add SVE clone scan. > * gcc.target/aarch64/vect-simd-clone-1.c: New test. > * g++.target/aarch64/vect-simd-clone-1.c: New test. > > > Co-authored-by: Victor Do Nascimento <victor.donascime...@arm.com> > Co-authored-by: Tamar Christina <tamar.christ...@arm.com> > > Bootstrapped Regtested on aarch64-none-linux-gnu, > arm-none-linux-gnueabihf, x86_64-pc-linux-gnu > -m32, -m64 and no issues. > > Ok for master? > > Thanks, > Tamar > > --- > diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64- > protos.h > index > c6ce62190bce43fae7b0c9d64202a7c042df6ef4..e7724e0518dd97a120edbc5f0 > 2b20298a57c653f 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -1138,6 +1138,8 @@ namespace aarch64_sve { > #ifdef GCC_TARGET_H > bool verify_type_context (location_t, type_context_kind, const_tree, bool); > #endif > + void add_sve_type_attribute (tree, unsigned int, unsigned int, > + const char *, const char *); > } > > extern void aarch64_split_combinev16qi (rtx operands[3]); > diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc > b/gcc/config/aarch64/aarch64-sve-builtins.cc > index > 0fec1cd439e729dca495aac4dea054a25ede20a7..e6c2bdeb00681848a838009c > 833cfe3271a94049 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins.cc > +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc > @@ -998,14 +998,16 @@ static GTY(()) hash_map<tree, registered_function *> > *overload_names[2]; > /* Record that TYPE is an ABI-defined SVE type that contains NUM_ZR SVE > vectors > and NUM_PR SVE predicates. MANGLED_NAME, if nonnull, is the ABI-defined > mangling of the type. ACLE_NAME is the <arm_sve.h> name of the type. */ > -static void > +void > add_sve_type_attribute (tree type, unsigned int num_zr, unsigned int num_pr, > const char *mangled_name, const char *acle_name) > { > tree mangled_name_tree > = (mangled_name ? get_identifier (mangled_name) : NULL_TREE); > + tree acle_name_tree > + = (acle_name ? get_identifier (acle_name) : NULL_TREE); > > - tree value = tree_cons (NULL_TREE, get_identifier (acle_name), NULL_TREE); > + tree value = tree_cons (NULL_TREE, acle_name_tree, NULL_TREE); > value = tree_cons (NULL_TREE, mangled_name_tree, value); > value = tree_cons (NULL_TREE, size_int (num_pr), value); > value = tree_cons (NULL_TREE, size_int (num_zr), value); > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index > 4108c09715a5540db87ec4ba74a10804af78054a..af6fede102c2be6673c24f80 > 20d000ea56322997 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -29284,7 +29284,7 @@ > aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, > int num, bool explicit_p) > { > tree t, ret_type; > - unsigned int nds_elt_bits; > + unsigned int nds_elt_bits, wds_elt_bits; > unsigned HOST_WIDE_INT const_simdlen; > > if (!TARGET_SIMD) > @@ -29329,10 +29329,14 @@ > aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, > if (TREE_CODE (ret_type) != VOID_TYPE) > { > nds_elt_bits = lane_size (SIMD_CLONE_ARG_TYPE_VECTOR, ret_type); > + wds_elt_bits = nds_elt_bits; > vec_elts.safe_push (std::make_pair (ret_type, nds_elt_bits)); > } > else > - nds_elt_bits = POINTER_SIZE; > + { > + nds_elt_bits = POINTER_SIZE; > + wds_elt_bits = 0; > + } > > int i; > tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl)); > @@ -29340,44 +29344,65 @@ > aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, > for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = > 0; > t && t != void_list_node; t = TREE_CHAIN (t), i++) > { > - tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t); > + tree type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t); > if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM > - && !supported_simd_type (arg_type)) > + && !supported_simd_type (type)) > { > if (!explicit_p) > ; > - else if (COMPLEX_FLOAT_TYPE_P (ret_type)) > + else if (COMPLEX_FLOAT_TYPE_P (type)) > warning_at (DECL_SOURCE_LOCATION (node->decl), 0, > "GCC does not currently support argument type %qT " > - "for simd", arg_type); > + "for simd", type); > else > warning_at (DECL_SOURCE_LOCATION (node->decl), 0, > "unsupported argument type %qT for simd", > - arg_type); > + type); > return 0; > } > - unsigned lane_bits = lane_size (clonei->args[i].arg_type, arg_type); > + unsigned lane_bits = lane_size (clonei->args[i].arg_type, type); > if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) > - vec_elts.safe_push (std::make_pair (arg_type, lane_bits)); > + vec_elts.safe_push (std::make_pair (type, lane_bits)); > if (nds_elt_bits > lane_bits) > nds_elt_bits = lane_bits; > + if (wds_elt_bits < lane_bits) > + wds_elt_bits = lane_bits; > } > > - clonei->vecsize_mangle = 'n'; > + /* If we could not determine the WDS type from available parameters/return, > + then fallback to using uintptr_t. */ > + if (wds_elt_bits == 0) > + wds_elt_bits = POINTER_SIZE; > + > clonei->mask_mode = VOIDmode; > poly_uint64 simdlen; > - auto_vec<poly_uint64> simdlens (2); > + typedef struct > + { > + poly_uint64 len; > + char mangle; > + } aarch64_clone_info; > + auto_vec<aarch64_clone_info> clones (3); > + > /* Keep track of the possible simdlens the clones of this function can > have, > and check them later to see if we support them. */ > if (known_eq (clonei->simdlen, 0U)) > { > simdlen = exact_div (poly_uint64 (64), nds_elt_bits); > if (maybe_ne (simdlen, 1U)) > - simdlens.safe_push (simdlen); > - simdlens.safe_push (simdlen * 2); > + clones.safe_push ({simdlen, 'n'}); > + clones.safe_push ({simdlen * 2, 'n'}); > + /* Only create an SVE simd clone if we aren't dealing with an > unprototyped > + function. > + We have also disabled support for creating SVE simdclones for functions > + with function bodies and any simdclones when -msve-vector-bits is used. > + TODO: add support for these. */ > + if (prototype_p (TREE_TYPE (node->decl)) > + && !node->definition > + && !aarch64_sve_vg.is_constant ()) > + clones.safe_push ({exact_div (BITS_PER_SVE_VECTOR, wds_elt_bits), 's'}); > } > else > - simdlens.safe_push (clonei->simdlen); > + clones.safe_push ({clonei->simdlen, 'n'}); > > clonei->vecsize_int = 0; > clonei->vecsize_float = 0; > @@ -29391,11 +29416,12 @@ > aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, > simdclone would cause a vector type to be larger than 128-bits, and > reject > such a clone. */ > unsigned j = 0; > - while (j < simdlens.length ()) > + while (j < clones.length ()) > { > bool remove_simdlen = false; > for (auto elt : vec_elts) > - if (known_gt (simdlens[j] * elt.second, 128U)) > + if (clones[j].mangle == 'n' > + && known_gt (clones[j].len * elt.second, 128U)) > { > /* Don't issue a warning for every simdclone when there is no > specific simdlen clause. */ > @@ -29403,18 +29429,17 @@ > aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, > warning_at (DECL_SOURCE_LOCATION (node->decl), 0, > "GCC does not currently support simdlen %wd for " > "type %qT", > - constant_lower_bound (simdlens[j]), elt.first); > + constant_lower_bound (clones[j].len), elt.first); > remove_simdlen = true; > break; > } > if (remove_simdlen) > - simdlens.ordered_remove (j); > + clones.ordered_remove (j); > else > j++; > } > > - > - int count = simdlens.length (); > + int count = clones.length (); > if (count == 0) > { > if (explicit_p && known_eq (clonei->simdlen, 0U)) > @@ -29431,21 +29456,103 @@ > aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, > } > > gcc_assert (num < count); > - clonei->simdlen = simdlens[num]; > + clonei->simdlen = clones[num].len; > + clonei->vecsize_mangle = clones[num].mangle; > + /* SVE simdclones always have a Mask, so set inbranch to 1. */ > + if (clonei->vecsize_mangle == 's') > + clonei->inbranch = 1; > return count; > } > > -/* Implement TARGET_SIMD_CLONE_ADJUST. */ > +/* Helper function to adjust an SVE vector type of an SVE simd clone. > Returns > + an SVE vector type based on the element type of the vector TYPE, with > SIMDLEN > + number of elements. If IS_MASK, returns an SVE mask type appropriate for > use > + with the SVE type it would otherwise return. */ > > +static tree > +simd_clone_adjust_sve_vector_type (tree type, bool is_mask, poly_uint64 > simdlen) > +{ > + unsigned int num_zr = 0; > + unsigned int num_pr = 0; > + machine_mode vector_mode; > + type = TREE_TYPE (type); > + scalar_mode scalar_m = SCALAR_TYPE_MODE (type); > + vector_mode = aarch64_sve_data_mode (scalar_m, simdlen).require (); > + type = build_vector_type_for_mode (type, vector_mode); > + if (is_mask) > + { > + type = truth_type_for (type); > + num_pr = 1; > + } > + else > + num_zr = 1; > + > + /* We create new types here with the SVE type attribute instead of using > ACLE > + types as we need to support unpacked vectors which aren't available as > + ACLE SVE types. */ > + type = build_distinct_type_copy (type); > + aarch64_sve::add_sve_type_attribute (type, num_zr, num_pr, NULL, NULL); > + return type; > +} > + > +/* Implement TARGET_SIMD_CLONE_ADJUST. */ > static void > aarch64_simd_clone_adjust (struct cgraph_node *node) > { > - /* Add aarch64_vector_pcs target attribute to SIMD clones so they > - use the correct ABI. */ > - > tree t = TREE_TYPE (node->decl); > - TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default", > - TYPE_ATTRIBUTES (t)); > + > + if (node->simdclone->vecsize_mangle == 's') > + { > + /* This is additive and has no effect if SVE, or a superset thereof, is > + already enabled. */ > + tree target = build_string (strlen ("+sve") + 1, "+sve"); > + if (!aarch64_option_valid_attribute_p (node->decl, NULL_TREE, target, > 0)) > + gcc_unreachable (); > + push_function_decl (node->decl); > + } > + else > + { > + /* Add aarch64_vector_pcs target attribute to SIMD clones so they > + use the correct ABI. */ > + TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default", > + TYPE_ATTRIBUTES (t)); > + } > + cgraph_simd_clone *sc = node->simdclone; > + > + for (unsigned i = 0; i < sc->nargs; ++i) > + { > + bool is_mask = false; > + tree type; > + switch (sc->args[i].arg_type) > + { > + case SIMD_CLONE_ARG_TYPE_MASK: > + is_mask = true; > + gcc_fallthrough (); > + case SIMD_CLONE_ARG_TYPE_VECTOR: > + case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP: > + case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP: > + type = sc->args[i].vector_type; > + gcc_assert (VECTOR_TYPE_P (type)); > + if (node->simdclone->vecsize_mangle == 's') > + type = simd_clone_adjust_sve_vector_type (type, is_mask, > + sc->simdlen); > + else if (is_mask) > + type = truth_type_for (type); > + sc->args[i].vector_type = type; > + break; > + default: > + continue; > + } > + } > + if (node->simdclone->vecsize_mangle == 's') > + { > + tree ret_type = TREE_TYPE (t); > + if (VECTOR_TYPE_P (ret_type)) > + TREE_TYPE (t) > + = simd_clone_adjust_sve_vector_type (ret_type, false, > + node->simdclone->simdlen); > + pop_function_decl (); > + } > } > > /* Implement TARGET_SIMD_CLONE_USABLE. */ > @@ -29459,6 +29566,11 @@ aarch64_simd_clone_usable (struct cgraph_node > *node, machine_mode vector_mode) > if (!TARGET_SIMD || aarch64_sve_mode_p (vector_mode)) > return -1; > return 0; > + case 's': > + if (!TARGET_SVE > + || !aarch64_sve_mode_p (vector_mode)) > + return -1; > + return 0; > default: > gcc_unreachable (); > } > diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc > index > 864586207ee89269b5a2cf136487440212d59695..4be25539057251a318409e > 576e4bc43fc5fd4c40 100644 > --- a/gcc/omp-simd-clone.cc > +++ b/gcc/omp-simd-clone.cc > @@ -541,9 +541,12 @@ simd_clone_mangle (struct cgraph_node *node, > pp_string (&pp, "_ZGV"); > pp_character (&pp, vecsize_mangle); > pp_character (&pp, mask); > - /* For now, simdlen is always constant, while variable simdlen pp 'n'. */ > - unsigned int len = simdlen.to_constant (); > - pp_decimal_int (&pp, (len)); > + > + unsigned HOST_WIDE_INT len; > + if (simdlen.is_constant (&len)) > + pp_decimal_int (&pp, (int) (len)); > + else > + pp_character (&pp, 'x'); > > for (n = 0; n < clone_info->nargs; ++n) > { > @@ -1533,8 +1536,8 @@ simd_clone_adjust (struct cgraph_node *node) > below). */ > loop = alloc_loop (); > cfun->has_force_vectorize_loops = true; > - /* For now, simlen is always constant. */ > - loop->safelen = node->simdclone->simdlen.to_constant (); > + /* We can assert that safelen is the 'minimum' simdlen. */ > + loop->safelen = constant_lower_bound (node->simdclone->simdlen); > loop->force_vectorize = true; > loop->header = body_bb; > } > diff --git a/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C > b/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C > new file mode 100644 > index > 0000000000000000000000000000000000000000..952b56dd87cc80ea7efadc > 63960157baac6abd63 > --- /dev/null > +++ b/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C > @@ -0,0 +1,88 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-O3 -march=armv8-a" } */ > + > +/* Ensure correct creation of SVE Vector-length agnostic (VLA SVE) vector > + function calls from scalar versions in accordance with the Vector > Function > + Application Binary Interface Specification for AArch64 (AAVPCS). > + > + We check for correctness in: > + - Vector function name mangling, with the grammar: > + > + vector name := prefix "_" name > + prefix := "_ZGV" isa mask <len> <parameters> > + > + Whereby: > + - <isa> := "s" for SVE > + - <mask> := "M" for Mask > + - <len> := "x" for VLA SVE > + > + resulting in: > + <prefix> := "_ZGVsMx" <parameters> > + > + with each vector parameter contributing a "v" to the prefix. > + > + - Parameter and return value mapping: > + - Unless marked with uniform or linear OpenMP clauses, parameters and > + return values are expected to map to vectors. > + - Where the lane-size of a parameter is less than the widest data size > + for a given function, the resulting vector should be unpacked and > + populated via use extending loads. > + > + - Finally, we also make sure we can correctly generate calls to the same > + function, differing only in the target architecture (i.e. SVE vs SIMD), > + ensuring that each call points to the correctly-mangled vector function > + and employs the correct ABI. For example, for `fn' we may expect: > + > + for #pragma GCC target("+sve"): _ZGVsMxvv_fn > + for #pragma GCC target("+simd): _ZGVnN4vv_fn */ > + > +#pragma GCC target ("+sve") > +/* { dg-final { scan-assembler {\s+_ZGVsMxv__Z3fn0i\n} } } */ > +extern int __attribute__ ((simd, const)) fn0 (int); > +void test_fn0 (int *a, int *b, int n) > +{ > + for (int i = 0; i < n; ++i) > + a[i] += fn0 (b[i]); > +} > + > +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn1si\n} } } */ > +extern int __attribute__ ((simd, const)) fn1 (short, int); > +void test_fn1 (int *a, int *b, short *c, int n) > +{ > + for (int i = 0; i < n; ++i) > + a[i] = fn1 (c[i], b[i]); > +} > + > +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn2si\n} } } */ > +extern short __attribute__ ((simd, const)) fn2 (short, int); > +void test_fn2 (short *a, int *b, short *c, int n) > +{ > + for (int i = 0; i < n; ++i) > + a[i] = fn2 (c[i], b[i]); > +} > + > +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn3ic\n} } } */ > +extern char __attribute__ ((simd, const)) fn3 (int, char); > +void test_fn3 (int *a, int *b, char *c, int n) > +{ > + for (int i = 0; i < n; ++i) > + a[i] = (int) (fn3 (b[i], c[i]) + c[i]); > +} > + > +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn4is\n} } } */ > +extern short __attribute__ ((simd, const)) fn4 (int, short); > +void test_fn4 (int *a, int *b, short *c, int n) > +{ > + for (int i = 0; i < n; ++i) > + a[i] = (int) (fn4 (b[i], c[i]) + c[i]); > +} > + > +#pragma GCC reset_options > +#pragma GCC target ("+simd") > +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn4is\n} } } */ > +extern short __attribute__ ((simd, const)) fn4 (int, short); > +void test_fn5 (int *a, int *b, short *c, int n) > +{ > + for (int i = 0; i < n; ++i) > + a[i] = (int) (fn4 (b[i], c[i]) + c[i]); > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c > b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c > index > e2e80f0c663dcc182b8cc48b0453558e794f4085..2f4d3a866e55018b8ac8b483 > b8c33db862a57071 100644 > --- a/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c > +++ b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c > @@ -43,6 +43,7 @@ float f04 (double a) > } > /* { dg-final { scan-assembler {_ZGVnN2v_f04:} } } */ > /* { dg-final { scan-assembler {_ZGVnM2v_f04:} } } */ > +/* { dg-final { scan-assembler-not {_ZGVs[0-9a-z]*_f04:} } } */ > > #pragma omp declare simd uniform(a) linear (b) > void f05 (short a, short *b, short c) > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c > b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..e2167648c8735df79973ac > 9fcbba0e966d61ee0a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c > @@ -0,0 +1,89 @@ > +/* { dg-do compile } */ > +/* { dg-options "-std=c99" } */ > +/* { dg-additional-options "-O3 -march=armv8-a" } */ > + > +/* Ensure correct creation of SVE Vector-length agnostic (VLA SVE) vector > + function calls from scalar versions in accordance with the Vector > Function > + Application Binary Interface Specification for AArch64 (AAVPCS). > + > + We check for correctness in: > + - Vector function name mangling, with the grammar: > + > + vector name := prefix "_" name > + prefix := "_ZGV" isa mask <len> <parameters> > + > + Whereby: > + - <isa> := "s" for SVE > + - <mask> := "M" for Mask > + - <len> := "x" for VLA SVE > + > + resulting in: > + <prefix> := "_ZGVsMx" <parameters> > + > + with each vector parameter contributing a "v" to the prefix. > + > + - Parameter and return value mapping: > + - Unless marked with uniform or linear OpenMP clauses, parameters and > + return values are expected to map to vectors. > + - Where the lane-size of a parameter is less than the widest data size > + for a given function, the resulting vector should be unpacked and > + populated via use extending loads. > + > + - Finally, we also make sure we can correctly generate calls to the same > + function, differing only in the target architecture (i.e. SVE vs SIMD), > + ensuring that each call points to the correctly-mangled vector function > + and employs the correct ABI. For example, for `fn' we may expect: > + > + for #pragma GCC target("+sve"): _ZGVsMxvv_fn > + for #pragma GCC target("+simd): _ZGVnN4vv_fn */ > + > +#pragma GCC target ("+sve") > +/* { dg-final { scan-assembler {\s+_ZGVsMxv_fn0\n} } } */ > +extern int __attribute__ ((simd, const)) fn0 (int); > +void test_fn0 (int *a, int *b, int n) > +{ > + for (int i = 0; i < n; ++i) > + a[i] += fn0 (b[i]); > +} > + > +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn1\n} } } */ > +extern int __attribute__ ((simd, const)) fn1 (short, int); > +void test_fn1 (int *a, int *b, short *c, int n) > +{ > + for (int i = 0; i < n; ++i) > + a[i] = fn1 (c[i], b[i]); > +} > + > +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn2\n} } } */ > +extern short __attribute__ ((simd, const)) fn2 (short, int); > +void test_fn2 (short *a, int *b, short *c, int n) > +{ > + for (int i = 0; i < n; ++i) > + a[i] = fn2 (c[i], b[i]); > +} > + > +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn3\n} } } */ > +extern char __attribute__ ((simd, const)) fn3 (int, char); > +void test_fn3 (int *a, int *b, char *c, int n) > +{ > + for (int i = 0; i < n; ++i) > + a[i] = (int) (fn3 (b[i], c[i]) + c[i]); > +} > + > +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn4\n} } } */ > +extern short __attribute__ ((simd, const)) fn4 (int, short); > +void test_fn4 (int *a, int *b, short *c, int n) > +{ > + for (int i = 0; i < n; ++i) > + a[i] = (int) (fn4 (b[i], c[i]) + c[i]); > +} > + > +#pragma GCC reset_options > +#pragma GCC target ("+simd") > +/* { dg-final { scan-assembler {\s+_ZGVnN4vv_fn4\n} } } */ > +extern short __attribute__ ((simd, const)) fn4 (int, short); > +void test_fn5 (int *a, int *b, short *c, int n) > +{ > + for (int i = 0; i < n; ++i) > + a[i] = (int) (fn4 (b[i], c[i]) + c[i]); > +} > > > > > --