RE: [PATCH 2/7]AArch64: Add SVE support for simd clones [PR96342]

Tamar Christina Wed, 11 Dec 2024 06:02:49 -0800

ping


> -----Original Message-----
> From: Tamar Christina <tamar.christ...@arm.com>
> Sent: Wednesday, December 4, 2024 12:17 PM
> To: gcc-patches@gcc.gnu.org
> Cc: nd <n...@arm.com>; Richard Earnshaw <richard.earns...@arm.com>;
> ktkac...@gcc.gnu.org; Richard Sandiford <richard.sandif...@arm.com>
> Subject: [PATCH 2/7]AArch64: Add SVE support for simd clones [PR96342]
> 
> Hi All,
> 
> This patch finalizes adding support for the generation of SVE simd clones when
> no simdlen is provided, following the ABI rules where the widest data type
> determines the minimum amount of elements in a length agnostic vector.
> 
> gcc/ChangeLog:
> 
>       PR target/96342
>       * config/aarch64/aarch64-protos.h (add_sve_type_attribute): Declare.
>       * config/aarch64/aarch64-sve-builtins.cc (add_sve_type_attribute): Make
>       visibility global and support use for non_acle types.
>       * config/aarch64/aarch64.cc
>       (aarch64_simd_clone_compute_vecsize_and_simdlen): Create VLA simd
> clone
>       when no simdlen is provided, according to ABI rules.
>       (simd_clone_adjust_sve_vector_type): New helper function.
>       (aarch64_simd_clone_adjust): Add '+sve' attribute to SVE simd clones
>       and modify types to use SVE types.
>       * omp-simd-clone.cc (simd_clone_mangle): Print 'x' for VLA simdlen.
>       (simd_clone_adjust): Adapt safelen check to be compatible with VLA
>       simdlen.
> 
> gcc/testsuite/ChangeLog:
> 
>       PR target/96342
>       * gcc.target/aarch64/declare-simd-2.c: Add SVE clone scan.
>       * gcc.target/aarch64/vect-simd-clone-1.c: New test.
>       * g++.target/aarch64/vect-simd-clone-1.c: New test.
> 
> 
> Co-authored-by: Victor Do Nascimento <victor.donascime...@arm.com>
> Co-authored-by: Tamar Christina <tamar.christ...@arm.com>
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> ---
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-
> protos.h
> index
> c6ce62190bce43fae7b0c9d64202a7c042df6ef4..e7724e0518dd97a120edbc5f0
> 2b20298a57c653f 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -1138,6 +1138,8 @@ namespace aarch64_sve {
>  #ifdef GCC_TARGET_H
>    bool verify_type_context (location_t, type_context_kind, const_tree, bool);
>  #endif
> + void add_sve_type_attribute (tree, unsigned int, unsigned int,
> +                           const char *, const char *);
>  }
> 
>  extern void aarch64_split_combinev16qi (rtx operands[3]);
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc
> b/gcc/config/aarch64/aarch64-sve-builtins.cc
> index
> 0fec1cd439e729dca495aac4dea054a25ede20a7..e6c2bdeb00681848a838009c
> 833cfe3271a94049 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
> @@ -998,14 +998,16 @@ static GTY(()) hash_map<tree, registered_function *>
> *overload_names[2];
>  /* Record that TYPE is an ABI-defined SVE type that contains NUM_ZR SVE 
> vectors
>     and NUM_PR SVE predicates.  MANGLED_NAME, if nonnull, is the ABI-defined
>     mangling of the type.  ACLE_NAME is the <arm_sve.h> name of the type.  */
> -static void
> +void
>  add_sve_type_attribute (tree type, unsigned int num_zr, unsigned int num_pr,
>                       const char *mangled_name, const char *acle_name)
>  {
>    tree mangled_name_tree
>      = (mangled_name ? get_identifier (mangled_name) : NULL_TREE);
> +  tree acle_name_tree
> +    = (acle_name ? get_identifier (acle_name) : NULL_TREE);
> 
> -  tree value = tree_cons (NULL_TREE, get_identifier (acle_name), NULL_TREE);
> +  tree value = tree_cons (NULL_TREE, acle_name_tree, NULL_TREE);
>    value = tree_cons (NULL_TREE, mangled_name_tree, value);
>    value = tree_cons (NULL_TREE, size_int (num_pr), value);
>    value = tree_cons (NULL_TREE, size_int (num_zr), value);
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index
> 4108c09715a5540db87ec4ba74a10804af78054a..af6fede102c2be6673c24f80
> 20d000ea56322997 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -29284,7 +29284,7 @@
> aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
>                                       int num, bool explicit_p)
>  {
>    tree t, ret_type;
> -  unsigned int nds_elt_bits;
> +  unsigned int nds_elt_bits, wds_elt_bits;
>    unsigned HOST_WIDE_INT const_simdlen;
> 
>    if (!TARGET_SIMD)
> @@ -29329,10 +29329,14 @@
> aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
>    if (TREE_CODE (ret_type) != VOID_TYPE)
>      {
>        nds_elt_bits = lane_size (SIMD_CLONE_ARG_TYPE_VECTOR, ret_type);
> +      wds_elt_bits = nds_elt_bits;
>        vec_elts.safe_push (std::make_pair (ret_type, nds_elt_bits));
>      }
>    else
> -    nds_elt_bits = POINTER_SIZE;
> +    {
> +      nds_elt_bits = POINTER_SIZE;
> +      wds_elt_bits = 0;
> +    }
> 
>    int i;
>    tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
> @@ -29340,44 +29344,65 @@
> aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
>    for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 
> 0;
>         t && t != void_list_node; t = TREE_CHAIN (t), i++)
>      {
> -      tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
> +      tree type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
>        if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM
> -       && !supported_simd_type (arg_type))
> +       && !supported_simd_type (type))
>       {
>         if (!explicit_p)
>           ;
> -       else if (COMPLEX_FLOAT_TYPE_P (ret_type))
> +       else if (COMPLEX_FLOAT_TYPE_P (type))
>           warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
>                       "GCC does not currently support argument type %qT "
> -                     "for simd", arg_type);
> +                     "for simd", type);
>         else
>           warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
>                       "unsupported argument type %qT for simd",
> -                     arg_type);
> +                     type);
>         return 0;
>       }
> -      unsigned lane_bits = lane_size (clonei->args[i].arg_type, arg_type);
> +      unsigned lane_bits = lane_size (clonei->args[i].arg_type, type);
>        if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
> -     vec_elts.safe_push (std::make_pair (arg_type, lane_bits));
> +     vec_elts.safe_push (std::make_pair (type, lane_bits));
>        if (nds_elt_bits > lane_bits)
>       nds_elt_bits = lane_bits;
> +      if (wds_elt_bits < lane_bits)
> +     wds_elt_bits = lane_bits;
>      }
> 
> -  clonei->vecsize_mangle = 'n';
> +  /* If we could not determine the WDS type from available parameters/return,
> +     then fallback to using uintptr_t.  */
> +  if (wds_elt_bits == 0)
> +    wds_elt_bits = POINTER_SIZE;
> +
>    clonei->mask_mode = VOIDmode;
>    poly_uint64 simdlen;
> -  auto_vec<poly_uint64> simdlens (2);
> +  typedef struct
> +    {
> +      poly_uint64 len;
> +      char mangle;
> +    } aarch64_clone_info;
> +  auto_vec<aarch64_clone_info> clones (3);
> +
>    /* Keep track of the possible simdlens the clones of this function can 
> have,
>       and check them later to see if we support them.  */
>    if (known_eq (clonei->simdlen, 0U))
>      {
>        simdlen = exact_div (poly_uint64 (64), nds_elt_bits);
>        if (maybe_ne (simdlen, 1U))
> -     simdlens.safe_push (simdlen);
> -      simdlens.safe_push (simdlen * 2);
> +     clones.safe_push ({simdlen, 'n'});
> +      clones.safe_push ({simdlen * 2, 'n'});
> +      /* Only create an SVE simd clone if we aren't dealing with an 
> unprototyped
> +      function.
> +      We have also disabled support for creating SVE simdclones for functions
> +      with function bodies and any simdclones when -msve-vector-bits is used.
> +      TODO: add support for these.  */
> +      if (prototype_p (TREE_TYPE (node->decl))
> +       && !node->definition
> +       && !aarch64_sve_vg.is_constant ())
> +     clones.safe_push ({exact_div (BITS_PER_SVE_VECTOR, wds_elt_bits), 's'});
>      }
>    else
> -    simdlens.safe_push (clonei->simdlen);
> +    clones.safe_push ({clonei->simdlen, 'n'});
> 
>    clonei->vecsize_int = 0;
>    clonei->vecsize_float = 0;
> @@ -29391,11 +29416,12 @@
> aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
>       simdclone would cause a vector type to be larger than 128-bits, and 
> reject
>       such a clone.  */
>    unsigned j = 0;
> -  while (j < simdlens.length ())
> +  while (j < clones.length ())
>      {
>        bool remove_simdlen = false;
>        for (auto elt : vec_elts)
> -     if (known_gt (simdlens[j] * elt.second, 128U))
> +     if (clones[j].mangle == 'n'
> +         && known_gt (clones[j].len * elt.second, 128U))
>         {
>           /* Don't issue a warning for every simdclone when there is no
>              specific simdlen clause.  */
> @@ -29403,18 +29429,17 @@
> aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
>             warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
>                         "GCC does not currently support simdlen %wd for "
>                         "type %qT",
> -                       constant_lower_bound (simdlens[j]), elt.first);
> +                       constant_lower_bound (clones[j].len), elt.first);
>           remove_simdlen = true;
>           break;
>         }
>        if (remove_simdlen)
> -     simdlens.ordered_remove (j);
> +     clones.ordered_remove (j);
>        else
>       j++;
>      }
> 
> -
> -  int count = simdlens.length ();
> +  int count = clones.length ();
>    if (count == 0)
>      {
>        if (explicit_p && known_eq (clonei->simdlen, 0U))
> @@ -29431,21 +29456,103 @@
> aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
>      }
> 
>    gcc_assert (num < count);
> -  clonei->simdlen = simdlens[num];
> +  clonei->simdlen = clones[num].len;
> +  clonei->vecsize_mangle = clones[num].mangle;
> +  /* SVE simdclones always have a Mask, so set inbranch to 1.  */
> +  if (clonei->vecsize_mangle == 's')
> +    clonei->inbranch = 1;
>    return count;
>  }
> 
> -/* Implement TARGET_SIMD_CLONE_ADJUST.  */
> +/* Helper function to adjust an SVE vector type of an SVE simd clone.  
> Returns
> +   an SVE vector type based on the element type of the vector TYPE, with 
> SIMDLEN
> +   number of elements.  If IS_MASK, returns an SVE mask type appropriate for 
> use
> +   with the SVE type it would otherwise return.  */
> 
> +static tree
> +simd_clone_adjust_sve_vector_type (tree type, bool is_mask, poly_uint64
> simdlen)
> +{
> +  unsigned int num_zr = 0;
> +  unsigned int num_pr = 0;
> +  machine_mode vector_mode;
> +  type = TREE_TYPE (type);
> +  scalar_mode scalar_m = SCALAR_TYPE_MODE (type);
> +  vector_mode = aarch64_sve_data_mode (scalar_m, simdlen).require ();
> +  type = build_vector_type_for_mode (type, vector_mode);
> +  if (is_mask)
> +    {
> +      type = truth_type_for (type);
> +      num_pr = 1;
> +    }
> +  else
> +    num_zr = 1;
> +
> +  /* We create new types here with the SVE type attribute instead of using 
> ACLE
> +     types as we need to support unpacked vectors which aren't available as
> +     ACLE SVE types.  */
> +  type = build_distinct_type_copy (type);
> +  aarch64_sve::add_sve_type_attribute (type, num_zr, num_pr, NULL, NULL);
> +  return type;
> +}
> +
> +/* Implement TARGET_SIMD_CLONE_ADJUST.  */
>  static void
>  aarch64_simd_clone_adjust (struct cgraph_node *node)
>  {
> -  /* Add aarch64_vector_pcs target attribute to SIMD clones so they
> -     use the correct ABI.  */
> -
>    tree t = TREE_TYPE (node->decl);
> -  TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
> -                                     TYPE_ATTRIBUTES (t));
> +
> +  if (node->simdclone->vecsize_mangle == 's')
> +    {
> +      /* This is additive and has no effect if SVE, or a superset thereof, is
> +      already enabled.  */
> +      tree target = build_string (strlen ("+sve") + 1, "+sve");
> +      if (!aarch64_option_valid_attribute_p (node->decl, NULL_TREE, target, 
> 0))
> +     gcc_unreachable ();
> +      push_function_decl (node->decl);
> +    }
> +  else
> +    {
> +     /* Add aarch64_vector_pcs target attribute to SIMD clones so they
> +        use the correct ABI.  */
> +     TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
> +                                           TYPE_ATTRIBUTES (t));
> +    }
> +  cgraph_simd_clone *sc = node->simdclone;
> +
> +  for (unsigned i = 0; i < sc->nargs; ++i)
> +    {
> +      bool is_mask = false;
> +      tree type;
> +      switch (sc->args[i].arg_type)
> +     {
> +     case SIMD_CLONE_ARG_TYPE_MASK:
> +       is_mask = true;
> +       gcc_fallthrough ();
> +     case SIMD_CLONE_ARG_TYPE_VECTOR:
> +     case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
> +     case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
> +       type = sc->args[i].vector_type;
> +       gcc_assert (VECTOR_TYPE_P (type));
> +       if (node->simdclone->vecsize_mangle == 's')
> +         type = simd_clone_adjust_sve_vector_type (type, is_mask,
> +                                                   sc->simdlen);
> +       else if (is_mask)
> +         type = truth_type_for (type);
> +       sc->args[i].vector_type = type;
> +       break;
> +     default:
> +       continue;
> +     }
> +    }
> +  if (node->simdclone->vecsize_mangle == 's')
> +    {
> +      tree ret_type = TREE_TYPE (t);
> +      if (VECTOR_TYPE_P (ret_type))
> +     TREE_TYPE (t)
> +       = simd_clone_adjust_sve_vector_type (ret_type, false,
> +                                            node->simdclone->simdlen);
> +      pop_function_decl ();
> +    }
>  }
> 
>  /* Implement TARGET_SIMD_CLONE_USABLE.  */
> @@ -29459,6 +29566,11 @@ aarch64_simd_clone_usable (struct cgraph_node
> *node, machine_mode vector_mode)
>        if (!TARGET_SIMD || aarch64_sve_mode_p (vector_mode))
>       return -1;
>        return 0;
> +    case 's':
> +      if (!TARGET_SVE
> +       || !aarch64_sve_mode_p (vector_mode))
> +     return -1;
> +      return 0;
>      default:
>        gcc_unreachable ();
>      }
> diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc
> index
> 864586207ee89269b5a2cf136487440212d59695..4be25539057251a318409e
> 576e4bc43fc5fd4c40 100644
> --- a/gcc/omp-simd-clone.cc
> +++ b/gcc/omp-simd-clone.cc
> @@ -541,9 +541,12 @@ simd_clone_mangle (struct cgraph_node *node,
>    pp_string (&pp, "_ZGV");
>    pp_character (&pp, vecsize_mangle);
>    pp_character (&pp, mask);
> -  /* For now, simdlen is always constant, while variable simdlen pp 'n'.  */
> -  unsigned int len = simdlen.to_constant ();
> -  pp_decimal_int (&pp, (len));
> +
> +  unsigned HOST_WIDE_INT len;
> +  if (simdlen.is_constant (&len))
> +    pp_decimal_int (&pp, (int) (len));
> +  else
> +    pp_character (&pp, 'x');
> 
>    for (n = 0; n < clone_info->nargs; ++n)
>      {
> @@ -1533,8 +1536,8 @@ simd_clone_adjust (struct cgraph_node *node)
>        below).  */
>        loop = alloc_loop ();
>        cfun->has_force_vectorize_loops = true;
> -      /* For now, simlen is always constant.  */
> -      loop->safelen = node->simdclone->simdlen.to_constant ();
> +      /* We can assert that safelen is the 'minimum' simdlen.  */
> +      loop->safelen = constant_lower_bound (node->simdclone->simdlen);
>        loop->force_vectorize = true;
>        loop->header = body_bb;
>      }
> diff --git a/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C
> b/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..952b56dd87cc80ea7efadc
> 63960157baac6abd63
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C
> @@ -0,0 +1,88 @@
> +/* { dg-do compile }  */
> +/* { dg-additional-options "-O3 -march=armv8-a" } */
> +
> +/*  Ensure correct creation of SVE Vector-length agnostic (VLA SVE) vector
> +    function calls from scalar versions in accordance with the Vector 
> Function
> +    Application Binary Interface Specification for AArch64 (AAVPCS).
> +
> +  We check for correctness in:
> +    - Vector function name mangling, with the grammar:
> +
> +      vector name := prefix  "_" name
> +      prefix := "_ZGV" isa mask <len> <parameters>
> +
> +      Whereby:
> +      - <isa>  := "s" for SVE
> +      - <mask> := "M" for Mask
> +      - <len>  := "x" for VLA SVE
> +
> +      resulting in:
> +      <prefix> := "_ZGVsMx" <parameters>
> +
> +      with each vector parameter contributing a "v" to the prefix.
> +
> +    - Parameter and return value mapping:
> +      - Unless marked with uniform or linear OpenMP clauses, parameters and
> +      return values are expected to map to vectors.
> +      - Where the lane-size of a parameter is less than the widest data size
> +      for a given function, the resulting vector should be unpacked and
> +      populated via use extending loads.
> +
> +    - Finally, we also make sure we can correctly generate calls to the same
> +      function, differing only in the target architecture (i.e. SVE vs SIMD),
> +      ensuring that each call points to the correctly-mangled vector function
> +      and employs the correct ABI.  For example, for `fn' we may expect:
> +
> +     for #pragma GCC target("+sve"): _ZGVsMxvv_fn
> +     for #pragma GCC target("+simd): _ZGVnN4vv_fn */
> +
> +#pragma GCC target ("+sve")
> +/* { dg-final { scan-assembler {\s+_ZGVsMxv__Z3fn0i\n} } } */
> +extern int __attribute__ ((simd, const)) fn0 (int);
> +void test_fn0 (int *a, int *b, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    a[i] += fn0 (b[i]);
> +}
> +
> +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn1si\n} } } */
> +extern int __attribute__ ((simd, const)) fn1 (short, int);
> +void test_fn1 (int *a, int *b, short *c, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    a[i] = fn1 (c[i], b[i]);
> +}
> +
> +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn2si\n} } } */
> +extern short __attribute__ ((simd, const)) fn2 (short, int);
> +void test_fn2 (short *a, int *b, short *c, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    a[i] = fn2 (c[i], b[i]);
> +}
> +
> +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn3ic\n} } } */
> +extern char __attribute__ ((simd, const)) fn3 (int, char);
> +void test_fn3 (int *a, int *b, char *c, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    a[i] = (int) (fn3 (b[i], c[i]) + c[i]);
> +}
> +
> +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn4is\n} } } */
> +extern short __attribute__ ((simd, const)) fn4 (int, short);
> +void test_fn4 (int *a, int *b, short *c, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    a[i] = (int) (fn4 (b[i], c[i]) + c[i]);
> +}
> +
> +#pragma GCC reset_options
> +#pragma GCC target ("+simd")
> +/* { dg-final { scan-assembler {\s+_ZGVsMxvv__Z3fn4is\n} } } */
> +extern short __attribute__ ((simd, const)) fn4 (int, short);
> +void test_fn5 (int *a, int *b, short *c, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    a[i] = (int) (fn4 (b[i], c[i]) + c[i]);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c
> b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c
> index
> e2e80f0c663dcc182b8cc48b0453558e794f4085..2f4d3a866e55018b8ac8b483
> b8c33db862a57071 100644
> --- a/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c
> @@ -43,6 +43,7 @@ float f04 (double a)
>  }
>  /* { dg-final { scan-assembler {_ZGVnN2v_f04:} } } */
>  /* { dg-final { scan-assembler {_ZGVnM2v_f04:} } } */
> +/* { dg-final { scan-assembler-not {_ZGVs[0-9a-z]*_f04:} } } */
> 
>  #pragma omp declare simd uniform(a) linear (b)
>  void f05 (short a, short *b, short c)
> diff --git a/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c
> b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..e2167648c8735df79973ac
> 9fcbba0e966d61ee0a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-1.c
> @@ -0,0 +1,89 @@
> +/* { dg-do compile }  */
> +/* { dg-options "-std=c99" } */
> +/* { dg-additional-options "-O3 -march=armv8-a" } */
> +
> +/*  Ensure correct creation of SVE Vector-length agnostic (VLA SVE) vector
> +    function calls from scalar versions in accordance with the Vector 
> Function
> +    Application Binary Interface Specification for AArch64 (AAVPCS).
> +
> +  We check for correctness in:
> +    - Vector function name mangling, with the grammar:
> +
> +      vector name := prefix  "_" name
> +      prefix := "_ZGV" isa mask <len> <parameters>
> +
> +      Whereby:
> +      - <isa>  := "s" for SVE
> +      - <mask> := "M" for Mask
> +      - <len>  := "x" for VLA SVE
> +
> +      resulting in:
> +      <prefix> := "_ZGVsMx" <parameters>
> +
> +      with each vector parameter contributing a "v" to the prefix.
> +
> +    - Parameter and return value mapping:
> +      - Unless marked with uniform or linear OpenMP clauses, parameters and
> +      return values are expected to map to vectors.
> +      - Where the lane-size of a parameter is less than the widest data size
> +      for a given function, the resulting vector should be unpacked and
> +      populated via use extending loads.
> +
> +    - Finally, we also make sure we can correctly generate calls to the same
> +      function, differing only in the target architecture (i.e. SVE vs SIMD),
> +      ensuring that each call points to the correctly-mangled vector function
> +      and employs the correct ABI.  For example, for `fn' we may expect:
> +
> +     for #pragma GCC target("+sve"): _ZGVsMxvv_fn
> +     for #pragma GCC target("+simd): _ZGVnN4vv_fn */
> +
> +#pragma GCC target ("+sve")
> +/* { dg-final { scan-assembler {\s+_ZGVsMxv_fn0\n} } } */
> +extern int __attribute__ ((simd, const)) fn0 (int);
> +void test_fn0 (int *a, int *b, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    a[i] += fn0 (b[i]);
> +}
> +
> +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn1\n} } } */
> +extern int __attribute__ ((simd, const)) fn1 (short, int);
> +void test_fn1 (int *a, int *b, short *c, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    a[i] = fn1 (c[i], b[i]);
> +}
> +
> +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn2\n} } } */
> +extern short __attribute__ ((simd, const)) fn2 (short, int);
> +void test_fn2 (short *a, int *b, short *c, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    a[i] = fn2 (c[i], b[i]);
> +}
> +
> +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn3\n} } } */
> +extern char __attribute__ ((simd, const)) fn3 (int, char);
> +void test_fn3 (int *a, int *b, char *c, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    a[i] = (int) (fn3 (b[i], c[i]) + c[i]);
> +}
> +
> +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn4\n} } } */
> +extern short __attribute__ ((simd, const)) fn4 (int, short);
> +void test_fn4 (int *a, int *b, short *c, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    a[i] = (int) (fn4 (b[i], c[i]) + c[i]);
> +}
> +
> +#pragma GCC reset_options
> +#pragma GCC target ("+simd")
> +/* { dg-final { scan-assembler {\s+_ZGVnN4vv_fn4\n} } } */
> +extern short __attribute__ ((simd, const)) fn4 (int, short);
> +void test_fn5 (int *a, int *b, short *c, int n)
> +{
> +  for (int i = 0; i < n; ++i)
> +    a[i] = (int) (fn4 (b[i], c[i]) + c[i]);
> +}
> 
> 
> 
> 
> --

RE: [PATCH 2/7]AArch64: Add SVE support for simd clones [PR96342]

Reply via email to