RE: [PATCH 2/7]AArch64: Add SVE support for simd clones [PR96342]

Tamar Christina Tue, 17 Dec 2024 10:00:31 -0800

> > +    }
> > +  cgraph_simd_clone *sc = node->simdclone;
> > +
> > +  for (unsigned i = 0; i < sc->nargs; ++i)
> > +    {
> > +      bool is_mask = false;
> > +      tree type;
> > +      switch (sc->args[i].arg_type)
> > +   {
> > +   case SIMD_CLONE_ARG_TYPE_MASK:
> > +     is_mask = true;
> > +     gcc_fallthrough ();
> > +   case SIMD_CLONE_ARG_TYPE_VECTOR:
> > +   case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
> > +   case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
> > +     type = sc->args[i].vector_type;
> > +     gcc_assert (VECTOR_TYPE_P (type));
> > +     if (node->simdclone->vecsize_mangle == 's')
> > +       type = simd_clone_adjust_sve_vector_type (type, is_mask,
> > +                                                 sc->simdlen);
> > +     else if (is_mask)
> > +       type = truth_type_for (type);
> 
> Sorry, I have a horrible feeling I knew this once and have forgotten,
> but: why do we need to this for non-SVE, when we didn't before?
>


I don't think we do either.  For Adv. SIMD the truth type is the same as
the vector type anyway so this is a no-op.  Removed.

> 
> I should have noticed this last time, sorry, but we don't seem to have
> any coverage for the linear cases above.  Maybe that comes in a later
> patch though.
> 

No, Though I tried to make some examples of linear cases.
On C the vectorizer just ignores the pragma.

In C++ with a linear reference we fail to vectorize because we hit the
Safe_len being an int and VF being a poly thing again and so we bail out.

I did manage to create a testcase that generates an ICE, but that's due to
an existing bug in the vectorizer with how it registers masks.

Since that's an existing bug I'm hoping that's not a blocker for this series.

gcc/ChangeLog:

        PR target/96342
        * config/aarch64/aarch64-protos.h (add_sve_type_attribute): Declare.
        * config/aarch64/aarch64-sve-builtins.cc (add_sve_type_attribute): Make
        visibility global and support use for non_acle types.
        * config/aarch64/aarch64.cc
        (aarch64_simd_clone_compute_vecsize_and_simdlen): Create VLA simd clone
        when no simdlen is provided, according to ABI rules.
        (simd_clone_adjust_sve_vector_type): New helper function.
        (aarch64_simd_clone_adjust): Add '+sve' attribute to SVE simd clones
        and modify types to use SVE types.
        * omp-simd-clone.cc (simd_clone_mangle): Print 'x' for VLA simdlen.
        (simd_clone_adjust): Adapt safelen check to be compatible with VLA
        simdlen.

gcc/testsuite/ChangeLog:

        PR target/96342
        * gcc.target/aarch64/declare-simd-2.c: Add SVE clone scan.
        * gcc.target/aarch64/vect-simd-clone-1.c: New test.
        * g++.target/aarch64/vect-simd-clone-1.c: New test.


Co-authored-by: Victor Do Nascimento <victor.donascime...@arm.com>
Co-authored-by: Tamar Christina <tamar.christ...@arm.com>

Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.

Ok for master?

Thanks,
Tamar

-- inline copy of patch --

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
bd17486e9128a21bd205ef1fb3ec3e323408ec59..7ab1316cf56850678d93b6fdb8d19eea18ad78f1
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1151,6 +1151,8 @@ namespace aarch64_sve {
 #ifdef GCC_TARGET_H
   bool verify_type_context (location_t, type_context_kind, const_tree, bool);
 #endif
+ void add_sve_type_attribute (tree, unsigned int, unsigned int,
+                             const char *, const char *);
 }
 
 extern void aarch64_split_combinev16qi (rtx operands[3]);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 
5acc56f99c65498cbf5593a9ee21540fa55098c2..e93c3a78e6d6c909f5de32ba8672503fc42b8d1c
 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -1032,15 +1032,18 @@ static GTY(()) hash_map<tree, registered_function *> 
*overload_names[2];
 
 /* Record that TYPE is an ABI-defined SVE type that contains NUM_ZR SVE vectors
    and NUM_PR SVE predicates.  MANGLED_NAME, if nonnull, is the ABI-defined
-   mangling of the type.  ACLE_NAME is the <arm_sve.h> name of the type.  */
-static void
+   mangling of the type.  mangling of the type.  ACLE_NAME is the <arm_sve.h>
+   name of the type, or null if <arm_sve.h> does not provide the type.  */
+void
 add_sve_type_attribute (tree type, unsigned int num_zr, unsigned int num_pr,
                        const char *mangled_name, const char *acle_name)
 {
   tree mangled_name_tree
     = (mangled_name ? get_identifier (mangled_name) : NULL_TREE);
+  tree acle_name_tree
+    = (acle_name ? get_identifier (acle_name) : NULL_TREE);
 
-  tree value = tree_cons (NULL_TREE, get_identifier (acle_name), NULL_TREE);
+  tree value = tree_cons (NULL_TREE, acle_name_tree, NULL_TREE);
   value = tree_cons (NULL_TREE, mangled_name_tree, value);
   value = tree_cons (NULL_TREE, size_int (num_pr), value);
   value = tree_cons (NULL_TREE, size_int (num_zr), value);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 
77a2a6bfa3a3a6cd678ceb820d310f44cacfe581..de4c0a0783912b54ac35d7c818c24574b27a4ca0
 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -29323,7 +29323,7 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct 
cgraph_node *node,
                                        int num, bool explicit_p)
 {
   tree t, ret_type;
-  unsigned int nds_elt_bits;
+  unsigned int nds_elt_bits, wds_elt_bits;
   unsigned HOST_WIDE_INT const_simdlen;
 
   if (!TARGET_SIMD)
@@ -29368,10 +29368,14 @@ aarch64_simd_clone_compute_vecsize_and_simdlen 
(struct cgraph_node *node,
   if (TREE_CODE (ret_type) != VOID_TYPE)
     {
       nds_elt_bits = lane_size (SIMD_CLONE_ARG_TYPE_VECTOR, ret_type);
+      wds_elt_bits = nds_elt_bits;
       vec_elts.safe_push (std::make_pair (ret_type, nds_elt_bits));
     }
   else
-    nds_elt_bits = POINTER_SIZE;
+    {
+      nds_elt_bits = POINTER_SIZE;
+      wds_elt_bits = 0;
+    }
 
   int i;
   tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
@@ -29379,44 +29383,65 @@ aarch64_simd_clone_compute_vecsize_and_simdlen 
(struct cgraph_node *node,
   for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
        t && t != void_list_node; t = TREE_CHAIN (t), i++)
     {
-      tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
+      tree type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
       if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM
-         && !supported_simd_type (arg_type))
+         && !supported_simd_type (type))
        {
          if (!explicit_p)
            ;
-         else if (COMPLEX_FLOAT_TYPE_P (ret_type))
+         else if (COMPLEX_FLOAT_TYPE_P (type))
            warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
                        "GCC does not currently support argument type %qT "
-                       "for simd", arg_type);
+                       "for simd", type);
          else
            warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
                        "unsupported argument type %qT for simd",
-                       arg_type);
+                       type);
          return 0;
        }
-      unsigned lane_bits = lane_size (clonei->args[i].arg_type, arg_type);
+      unsigned lane_bits = lane_size (clonei->args[i].arg_type, type);
       if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
-       vec_elts.safe_push (std::make_pair (arg_type, lane_bits));
+       vec_elts.safe_push (std::make_pair (type, lane_bits));
       if (nds_elt_bits > lane_bits)
        nds_elt_bits = lane_bits;
+      if (wds_elt_bits < lane_bits)
+       wds_elt_bits = lane_bits;
     }
 
-  clonei->vecsize_mangle = 'n';
+  /* If we could not determine the WDS type from available parameters/return,
+     then fallback to using uintptr_t.  */
+  if (wds_elt_bits == 0)
+    wds_elt_bits = POINTER_SIZE;
+
   clonei->mask_mode = VOIDmode;
   poly_uint64 simdlen;
-  auto_vec<poly_uint64> simdlens (2);
+  typedef struct
+    {
+      poly_uint64 len;
+      char mangle;
+    } aarch64_clone_info;
+  auto_vec<aarch64_clone_info, 3> clones;
+
   /* Keep track of the possible simdlens the clones of this function can have,
      and check them later to see if we support them.  */
   if (known_eq (clonei->simdlen, 0U))
     {
       simdlen = exact_div (poly_uint64 (64), nds_elt_bits);
       if (maybe_ne (simdlen, 1U))
-       simdlens.safe_push (simdlen);
-      simdlens.safe_push (simdlen * 2);
+       clones.safe_push ({simdlen, 'n'});
+      clones.safe_push ({simdlen * 2, 'n'});
+      /* Only create an SVE simd clone if we aren't dealing with an 
unprototyped
+        function.
+        We have also disabled support for creating SVE simdclones for functions
+        with function bodies and any simdclones when -msve-vector-bits is used.
+        TODO: add support for these.  */
+      if (prototype_p (TREE_TYPE (node->decl))
+         && !node->definition
+         && !aarch64_sve_vg.is_constant ())
+       clones.safe_push ({exact_div (BITS_PER_SVE_VECTOR, wds_elt_bits), 's'});
     }
   else
-    simdlens.safe_push (clonei->simdlen);
+    clones.safe_push ({clonei->simdlen, 'n'});
 
   clonei->vecsize_int = 0;
   clonei->vecsize_float = 0;
@@ -29430,11 +29455,12 @@ aarch64_simd_clone_compute_vecsize_and_simdlen 
(struct cgraph_node *node,
      simdclone would cause a vector type to be larger than 128-bits, and reject
      such a clone.  */
   unsigned j = 0;
-  while (j < simdlens.length ())
+  while (j < clones.length ())
     {
       bool remove_simdlen = false;
       for (auto elt : vec_elts)
-       if (known_gt (simdlens[j] * elt.second, 128U))
+       if (clones[j].mangle == 'n'
+           && known_gt (clones[j].len * elt.second, 128U))
          {
            /* Don't issue a warning for every simdclone when there is no
               specific simdlen clause.  */
@@ -29442,18 +29468,17 @@ aarch64_simd_clone_compute_vecsize_and_simdlen 
(struct cgraph_node *node,
              warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
                          "GCC does not currently support simdlen %wd for "
                          "type %qT",
-                         constant_lower_bound (simdlens[j]), elt.first);
+                         constant_lower_bound (clones[j].len), elt.first);
            remove_simdlen = true;
            break;
          }
       if (remove_simdlen)
-       simdlens.ordered_remove (j);
+       clones.ordered_remove (j);
       else
        j++;
     }
 
-
-  int count = simdlens.length ();
+  int count = clones.length ();
   if (count == 0)
     {
       if (explicit_p && known_eq (clonei->simdlen, 0U))
@@ -29470,21 +29495,112 @@ aarch64_simd_clone_compute_vecsize_and_simdlen 
(struct cgraph_node *node,
     }
 
   gcc_assert (num < count);
-  clonei->simdlen = simdlens[num];
+  clonei->simdlen = clones[num].len;
+  clonei->vecsize_mangle = clones[num].mangle;
+  /* SVE simdclones always have a Mask, so set inbranch to 1.  */
+  if (clonei->vecsize_mangle == 's')
+    clonei->inbranch = 1;
   return count;
 }
 
-/* Implement TARGET_SIMD_CLONE_ADJUST.  */
+/* Helper function to adjust an SVE vector type of an SVE simd clone.  Returns
+   an SVE vector type based on the element type of the vector TYPE, with 
SIMDLEN
+   number of elements.  If IS_MASK, returns an SVE mask type appropriate for 
use
+   with the SVE type it would otherwise return.  */
+
+static tree
+simd_clone_adjust_sve_vector_type (tree type, bool is_mask, poly_uint64 
simdlen)
+{
+  unsigned int num_zr = 0;
+  unsigned int num_pr = 0;
+  machine_mode vector_mode;
+  type = TREE_TYPE (type);
+  scalar_mode scalar_m = SCALAR_TYPE_MODE (type);
+  vector_mode = aarch64_sve_data_mode (scalar_m, simdlen).require ();
+  type = build_vector_type_for_mode (type, vector_mode);
+  if (is_mask)
+    {
+      type = truth_type_for (type);
+      num_pr = 1;
+    }
+  else
+    num_zr = 1;
+
+  /* We create new types here with the SVE type attribute instead of using ACLE
+     types as we need to support unpacked vectors which aren't available as
+     ACLE SVE types.  */
+
+  /* ??? This creates anonymous "SVE type" attributes for all types,
+     even those that correspond to <arm_sve.h> types.  This affects type
+     compatibility in C/C++, but not in gimple.  (Gimple type equivalence
+     is instead decided by TARGET_COMPATIBLE_VECTOR_TYPES_P.)
 
+     Thus a C/C++ definition of the implementation function will have a
+     different function type from the declaration that this code creates.
+     However, it doesn't seem worth trying to fix that until we have a
+     way of handling implementations that operate on unpacked types.  */
+  type = build_distinct_type_copy (type);
+  aarch64_sve::add_sve_type_attribute (type, num_zr, num_pr, NULL, NULL);
+  return type;
+}
+
+/* Implement TARGET_SIMD_CLONE_ADJUST.  */
 static void
 aarch64_simd_clone_adjust (struct cgraph_node *node)
 {
-  /* Add aarch64_vector_pcs target attribute to SIMD clones so they
-     use the correct ABI.  */
-
   tree t = TREE_TYPE (node->decl);
-  TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
-                                       TYPE_ATTRIBUTES (t));
+
+  if (node->simdclone->vecsize_mangle == 's')
+    {
+      /* This is additive and has no effect if SVE, or a superset thereof, is
+        already enabled.  */
+      tree target = build_string (strlen ("+sve") + 1, "+sve");
+      if (!aarch64_option_valid_attribute_p (node->decl, NULL_TREE, target, 0))
+       gcc_unreachable ();
+      push_function_decl (node->decl);
+    }
+  else
+    {
+      /* Add aarch64_vector_pcs target attribute to SIMD clones so they
+        use the correct ABI.  */
+      TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
+                                           TYPE_ATTRIBUTES (t));
+    }
+
+  cgraph_simd_clone *sc = node->simdclone;
+
+  for (unsigned i = 0; i < sc->nargs; ++i)
+    {
+      bool is_mask = false;
+      tree type;
+      switch (sc->args[i].arg_type)
+       {
+       case SIMD_CLONE_ARG_TYPE_MASK:
+         is_mask = true;
+         gcc_fallthrough ();
+       case SIMD_CLONE_ARG_TYPE_VECTOR:
+       case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
+       case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
+         type = sc->args[i].vector_type;
+         gcc_assert (VECTOR_TYPE_P (type));
+         if (node->simdclone->vecsize_mangle == 's')
+           type = simd_clone_adjust_sve_vector_type (type, is_mask,
+                                                     sc->simdlen);
+         sc->args[i].vector_type = type;
+         break;
+       default:
+         continue;
+       }
+    }
+  if (node->simdclone->vecsize_mangle == 's')
+    {
+      tree ret_type = TREE_TYPE (t);
+      if (VECTOR_TYPE_P (ret_type))
+       TREE_TYPE (t)
+         = simd_clone_adjust_sve_vector_type (ret_type, false,
+                                              node->simdclone->simdlen);
+      pop_function_decl ();
+    }
 }
 
 /* Implement TARGET_SIMD_CLONE_USABLE.  */
@@ -29498,6 +29614,11 @@ aarch64_simd_clone_usable (struct cgraph_node *node, 
machine_mode vector_mode)
       if (!TARGET_SIMD || aarch64_sve_mode_p (vector_mode))
        return -1;
       return 0;
+    case 's':
+      if (!TARGET_SVE
+         || !aarch64_sve_mode_p (vector_mode))
+       return -1;
+      return 0;
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc
index 
864586207ee89269b5a2cf136487440212d59695..4be25539057251a318409e576e4bc43fc5fd4c40
 100644
--- a/gcc/omp-simd-clone.cc
+++ b/gcc/omp-simd-clone.cc
@@ -541,9 +541,12 @@ simd_clone_mangle (struct cgraph_node *node,
   pp_string (&pp, "_ZGV");
   pp_character (&pp, vecsize_mangle);
   pp_character (&pp, mask);
-  /* For now, simdlen is always constant, while variable simdlen pp 'n'.  */
-  unsigned int len = simdlen.to_constant ();
-  pp_decimal_int (&pp, (len));
+
+  unsigned HOST_WIDE_INT len;
+  if (simdlen.is_constant (&len))
+    pp_decimal_int (&pp, (int) (len));
+  else
+    pp_character (&pp, 'x');
 
   for (n = 0; n < clone_info->nargs; ++n)
     {
@@ -1533,8 +1536,8 @@ simd_clone_adjust (struct cgraph_node *node)
         below).  */
       loop = alloc_loop ();
       cfun->has_force_vectorize_loops = true;
-      /* For now, simlen is always constant.  */
-      loop->safelen = node->simdclone->simdlen.to_constant ();
+      /* We can assert that safelen is the 'minimum' simdlen.  */
+      loop->safelen = constant_lower_bound (node->simdclone->simdlen);
       loop->force_vectorize = true;
       loop->header = body_bb;
     }
diff --git a/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C 
b/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C
new file mode 100644
index 
0000000000000000000000000000000000000000..90febeca16e5126ed86f2f472b66c3bc3533c773
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C
@@ -0,0 +1,88 @@
+/* { dg-do compile }  */
+/* { dg-additional-options "-O3 -march=armv8-a" } */
+
+/*  Ensure correct creation of SVE Vector-length agnostic (VLA SVE) vector
+    function calls from scalar versions in accordance with the Vector Function
+    Application Binary Interface Specification for AArch64 (AAVPCS).
+
+  We check for correctness in:
+    - Vector function name mangling, with the grammar:
+
+      vector name := prefix  "_" name
+      prefix := "_ZGV" isa mask <len> <parameters>
+
+      Whereby:
+      - <isa>  := "s" for SVE
+      - <mask> := "M" for Mask
+      - <len>  := "x" for VLA SVE
+
+      resulting in:
+      <prefix> := "_ZGVsMx" <parameters>
+
+      with each vector parameter contributing a "v" to the prefix.
+
+    - Parameter and return value mapping:
+      - Unless marked with uniform or linear OpenMP clauses, parameters and
+        return values are expected to map to vectors.
+      - Where the lane-size of a parameter is less than the widest data size
+        for a given function, the resulting vector should be unpacked and
+        populated via extending loads.
+
+    - Finally, we also make sure we can correctly generate calls to the same
+      function, differing only in the target architecture (i.e. SVE vs SIMD),
+      ensuring that each call points to the correctly-mangled vector function
+      and employs the correct ABI.  For example, for `fn' we may expect:
+
+       for #pragma GCC target("+sve"): _ZGVsMxvv_fn
+       for #pragma GCC target("+simd): _ZGVnN4vv_fn */
+
+#pragma GCC target ("+sve")
+/* { dg-final { scan-assembler {\s+_ZGVsMxv__Z3fn0i\n} } } */
+extern int __attribute__ ((simd, const)) fn0 (int);
+void test_fn0 (int *a, int *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] += fn0 (b[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn1si\n} } } */
+extern int __attribute__ ((simd, const)) fn1 (short, int);
+void test_fn1 (int *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = fn1 (c[i], b[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn2si\n} } } */
+extern short __attribute__ ((simd, const)) fn2 (short, int);
+void test_fn2 (short *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = fn2 (c[i], b[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn3ic\n} } } */
+extern char __attribute__ ((simd, const)) fn3 (int, char);
+void test_fn3 (int *a, int *b, char *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = (int) (fn3 (b[i], c[i]) + c[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn4is\n} } } */
+extern short __attribute__ ((simd, const)) fn4 (int, short);
+void test_fn4 (int *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = (int) (fn4 (b[i], c[i]) + c[i]);
+}
+
+#pragma GCC reset_options
+#pragma GCC target ("+simd")
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn4is\n} } } */
+extern short __attribute__ ((simd, const)) fn4 (int, short);
+void test_fn5 (int *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = (int) (fn4 (b[i], c[i]) + c[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c 
b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c
index 
e2e80f0c663dcc182b8cc48b0453558e794f4085..2f4d3a866e55018b8ac8b483b8c33db862a57071
 100644
--- a/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c
+++ b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c
@@ -43,6 +43,7 @@ float f04 (double a)
 }
 /* { dg-final { scan-assembler {_ZGVnN2v_f04:} } } */
 /* { dg-final { scan-assembler {_ZGVnM2v_f04:} } } */
+/* { dg-final { scan-assembler-not {_ZGVs[0-9a-z]*_f04:} } } */
 
 #pragma omp declare simd uniform(a) linear (b)
 void f05 (short a, short *b, short c)
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c 
b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..0d8f497644ca119529a5778b81ae8a78948306e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c
@@ -0,0 +1,89 @@
+/* { dg-do compile }  */
+/* { dg-options "-std=c99" } */
+/* { dg-additional-options "-O3 -march=armv8-a" } */
+
+/*  Ensure correct creation of SVE Vector-length agnostic (VLA SVE) vector
+    function calls from scalar versions in accordance with the Vector Function
+    Application Binary Interface Specification for AArch64 (AAVPCS).
+
+  We check for correctness in:
+    - Vector function name mangling, with the grammar:
+
+      vector name := prefix  "_" name
+      prefix := "_ZGV" isa mask <len> <parameters>
+
+      Whereby:
+      - <isa>  := "s" for SVE
+      - <mask> := "M" for Mask
+      - <len>  := "x" for VLA SVE
+
+      resulting in:
+      <prefix> := "_ZGVsMx" <parameters>
+
+      with each vector parameter contributing a "v" to the prefix.
+
+    - Parameter and return value mapping:
+      - Unless marked with uniform or linear OpenMP clauses, parameters and
+        return values are expected to map to vectors.
+      - Where the lane-size of a parameter is less than the widest data size
+        for a given function, the resulting vector should be unpacked and
+        populated via extending loads.
+
+    - Finally, we also make sure we can correctly generate calls to the same
+      function, differing only in the target architecture (i.e. SVE vs SIMD),
+      ensuring that each call points to the correctly-mangled vector function
+      and employs the correct ABI.  For example, for `fn' we may expect:
+
+       for #pragma GCC target("+sve"): _ZGVsMxvv_fn
+       for #pragma GCC target("+simd): _ZGVnN4vv_fn */
+
+#pragma GCC target ("+sve")
+/* { dg-final { scan-assembler {\s+_ZGVsMxv_fn0\n} } } */
+extern int __attribute__ ((simd, const)) fn0 (int);
+void test_fn0 (int *a, int *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] += fn0 (b[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn1\n} } } */
+extern int __attribute__ ((simd, const)) fn1 (short, int);
+void test_fn1 (int *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = fn1 (c[i], b[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn2\n} } } */
+extern short __attribute__ ((simd, const)) fn2 (short, int);
+void test_fn2 (short *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = fn2 (c[i], b[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn3\n} } } */
+extern char __attribute__ ((simd, const)) fn3 (int, char);
+void test_fn3 (int *a, int *b, char *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = (int) (fn3 (b[i], c[i]) + c[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn4\n} } } */
+extern short __attribute__ ((simd, const)) fn4 (int, short);
+void test_fn4 (int *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = (int) (fn4 (b[i], c[i]) + c[i]);
+}
+
+#pragma GCC reset_options
+#pragma GCC target ("+simd")
+/* { dg-final { scan-assembler {\s+_ZGVnN4vv_fn4\n} } } */
+extern short __attribute__ ((simd, const)) fn4 (int, short);
+void test_fn5 (int *a, int *b, short *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = (int) (fn4 (b[i], c[i]) + c[i]);
+}

rb19026.patch
Description: rb19026.patch

RE: [PATCH 2/7]AArch64: Add SVE support for simd clones [PR96342]

Reply via email to