Tamar Christina <tamar.christ...@arm.com> writes:
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins.cc
> index 
> 0fec1cd439e729dca495aac4dea054a25ede20a7..e6c2bdeb00681848a838009c833cfe3271a94049
>  100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
> @@ -998,14 +998,16 @@ static GTY(()) hash_map<tree, registered_function *> 
> *overload_names[2];
>  /* Record that TYPE is an ABI-defined SVE type that contains NUM_ZR SVE 
> vectors
>     and NUM_PR SVE predicates.  MANGLED_NAME, if nonnull, is the ABI-defined
>     mangling of the type.  ACLE_NAME is the <arm_sve.h> name of the type.  */

How about:

    mangling of the type.  ACLE_NAME is the <arm_sve.h> name of the type,
    or null if <arm_sve.h> does not provide the type.  */

> -static void
> +void
>  add_sve_type_attribute (tree type, unsigned int num_zr, unsigned int num_pr,
>                       const char *mangled_name, const char *acle_name)
>  {
>    tree mangled_name_tree
>      = (mangled_name ? get_identifier (mangled_name) : NULL_TREE);
> +  tree acle_name_tree
> +    = (acle_name ? get_identifier (acle_name) : NULL_TREE);
>  
> -  tree value = tree_cons (NULL_TREE, get_identifier (acle_name), NULL_TREE);
> +  tree value = tree_cons (NULL_TREE, acle_name_tree, NULL_TREE);
>    value = tree_cons (NULL_TREE, mangled_name_tree, value);
>    value = tree_cons (NULL_TREE, size_int (num_pr), value);
>    value = tree_cons (NULL_TREE, size_int (num_zr), value);
> [...]
>  
> -  clonei->vecsize_mangle = 'n';
> +  /* If we could not determine the WDS type from available parameters/return,
> +     then fallback to using uintptr_t.  */
> +  if (wds_elt_bits == 0)
> +    wds_elt_bits = POINTER_SIZE;
> +
>    clonei->mask_mode = VOIDmode;
>    poly_uint64 simdlen;
> -  auto_vec<poly_uint64> simdlens (2);
> +  typedef struct
> +    {
> +      poly_uint64 len;
> +      char mangle;
> +    } aarch64_clone_info;
> +  auto_vec<aarch64_clone_info> clones (3);

Might as well make this "auto_vec<aarch64_clone_info, 3> clones;".

> [...]
> +/* Helper function to adjust an SVE vector type of an SVE simd clone.  
> Returns
> +   an SVE vector type based on the element type of the vector TYPE, with 
> SIMDLEN
> +   number of elements.  If IS_MASK, returns an SVE mask type appropriate for 
> use
> +   with the SVE type it would otherwise return.  */
 
> +static tree
> +simd_clone_adjust_sve_vector_type (tree type, bool is_mask, poly_uint64 
> simdlen)
> +{
> +  unsigned int num_zr = 0;
> +  unsigned int num_pr = 0;
> +  machine_mode vector_mode;
> +  type = TREE_TYPE (type);
> +  scalar_mode scalar_m = SCALAR_TYPE_MODE (type);
> +  vector_mode = aarch64_sve_data_mode (scalar_m, simdlen).require ();
> +  type = build_vector_type_for_mode (type, vector_mode);
> +  if (is_mask)
> +    {
> +      type = truth_type_for (type);
> +      num_pr = 1;
> +    }
> +  else
> +    num_zr = 1;
> +
> +  /* We create new types here with the SVE type attribute instead of using 
> ACLE
> +     types as we need to support unpacked vectors which aren't available as
> +     ACLE SVE types.  */

One thing that worried me when seeing this again is that we'll create
anonymous attributes for things that do have an ACLE type.  The anonymous
and ACLE attributes will then compare unequal.  But that will only make
a difference once we support a means of defining the implementation in
C/C++.  It might be worth adding a ??? though:

  /* ??? This creates anonymous "SVE type" attributes for all types,
     even those that correspond to <arm_sve.h> types.  This affects type
     compatibility in C/C++, but not in gimple.  (Gimple type equivalence
     is instead decided by TARGET_COMPATIBLE_VECTOR_TYPES_P.)

     Thus a C/C++ definition of the implementation function will have a
     different function type from the declaration that this code creates.
     However, it doesn't seem worth trying to fix that until we have a
     way of handling implementations that operate on unpacked types.  */

> +  type = build_distinct_type_copy (type);
> +  aarch64_sve::add_sve_type_attribute (type, num_zr, num_pr, NULL, NULL);
> +  return type;
> +}
> +
>+/* Implement TARGET_SIMD_CLONE_ADJUST.  */
>  static void
>  aarch64_simd_clone_adjust (struct cgraph_node *node)
>  {
> -  /* Add aarch64_vector_pcs target attribute to SIMD clones so they
> -     use the correct ABI.  */
> -
>    tree t = TREE_TYPE (node->decl);
> -  TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
> -                                     TYPE_ATTRIBUTES (t));
> +
> +  if (node->simdclone->vecsize_mangle == 's')
> +    {
> +      /* This is additive and has no effect if SVE, or a superset thereof, is
> +      already enabled.  */
> +      tree target = build_string (strlen ("+sve") + 1, "+sve");
> +      if (!aarch64_option_valid_attribute_p (node->decl, NULL_TREE, target, 
> 0))
> +     gcc_unreachable ();
> +      push_function_decl (node->decl);
> +    }
> +  else
> +    {
> +     /* Add aarch64_vector_pcs target attribute to SIMD clones so they
> +        use the correct ABI.  */
> +     TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
> +                                           TYPE_ATTRIBUTES (t));

This block is indented 2 columns too far.

> +    }
> +  cgraph_simd_clone *sc = node->simdclone;
> +
> +  for (unsigned i = 0; i < sc->nargs; ++i)
> +    {
> +      bool is_mask = false;
> +      tree type;
> +      switch (sc->args[i].arg_type)
> +     {
> +     case SIMD_CLONE_ARG_TYPE_MASK:
> +       is_mask = true;
> +       gcc_fallthrough ();
> +     case SIMD_CLONE_ARG_TYPE_VECTOR:
> +     case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
> +     case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
> +       type = sc->args[i].vector_type;
> +       gcc_assert (VECTOR_TYPE_P (type));
> +       if (node->simdclone->vecsize_mangle == 's')
> +         type = simd_clone_adjust_sve_vector_type (type, is_mask,
> +                                                   sc->simdlen);
> +       else if (is_mask)
> +         type = truth_type_for (type);

Sorry, I have a horrible feeling I knew this once and have forgotten,
but: why do we need to this for non-SVE, when we didn't before?

> +       sc->args[i].vector_type = type;
> +       break;
> +     default:
> +       continue;
> +     }
> +    }
> +  if (node->simdclone->vecsize_mangle == 's')
> +    {
> +      tree ret_type = TREE_TYPE (t);
> +      if (VECTOR_TYPE_P (ret_type))
> +     TREE_TYPE (t)
> +       = simd_clone_adjust_sve_vector_type (ret_type, false,
> +                                            node->simdclone->simdlen);
> +      pop_function_decl ();
> +    }
>  }
>  
>  /* Implement TARGET_SIMD_CLONE_USABLE.  */
> [...]
> diff --git a/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C 
> b/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..952b56dd87cc80ea7efadc63960157baac6abd63
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/aarch64/vect-simd-clone-1.C
> @@ -0,0 +1,88 @@
> +/* { dg-do compile }  */
> +/* { dg-additional-options "-O3 -march=armv8-a" } */
> +
> +/*  Ensure correct creation of SVE Vector-length agnostic (VLA SVE) vector
> +    function calls from scalar versions in accordance with the Vector 
> Function
> +    Application Binary Interface Specification for AArch64 (AAVPCS).
> +
> +  We check for correctness in:
> +    - Vector function name mangling, with the grammar:
> +
> +      vector name := prefix  "_" name
> +      prefix := "_ZGV" isa mask <len> <parameters>
> +
> +      Whereby:
> +      - <isa>  := "s" for SVE
> +      - <mask> := "M" for Mask
> +      - <len>  := "x" for VLA SVE
> +
> +      resulting in:
> +      <prefix> := "_ZGVsMx" <parameters>
> +
> +      with each vector parameter contributing a "v" to the prefix.
> +
> +    - Parameter and return value mapping:
> +      - Unless marked with uniform or linear OpenMP clauses, parameters and
> +      return values are expected to map to vectors.
> +      - Where the lane-size of a parameter is less than the widest data size
> +      for a given function, the resulting vector should be unpacked and
> +      populated via use extending loads.

s/use //

It can be populated in other ways too, though, so it might be worth
saying something more equivocal.

Same for the C test.

I should have noticed this last time, sorry, but we don't seem to have
any coverage for the linear cases above.  Maybe that comes in a later
patch though.

Thanks,
Richard

Reply via email to