RE: [PATCH 3/6] arm: [MVE intrinsics] Add support for contiguous loads and stores

Kyrylo Tkachov Thu, 16 Nov 2023 08:48:58 -0800

> -----Original Message-----
> From: Christophe Lyon <christophe.l...@linaro.org>
> Sent: Thursday, November 16, 2023 3:26 PM
> To: gcc-patches@gcc.gnu.org; Richard Sandiford
> <richard.sandif...@arm.com>; Richard Earnshaw
> <richard.earns...@arm.com>; Kyrylo Tkachov <kyrylo.tkac...@arm.com>
> Cc: Christophe Lyon <christophe.l...@linaro.org>
> Subject: [PATCH 3/6] arm: [MVE intrinsics] Add support for contiguous loads
> and stores
> 
> This patch adds base support for load/store intrinsics to the
> framework, starting with loads and stores for contiguous memory
> elements, without extension nor truncation.
> 
> Compared to the aarch64/SVE implementation, there's no support for
> gather/scatter loads/stores yet.  This will be added later as needed.
> 

Ok.
Thanks,
Kyrill

> 2023-11-16  Christophe Lyon  <christophe.l...@linaro.org>
> 
>       gcc/
>       * config/arm/arm-mve-builtins-functions.h (multi_vector_function)
>       (full_width_access): New classes.
>       * config/arm/arm-mve-builtins.cc
>       (find_type_suffix_for_scalar_type, infer_pointer_type)
>       (require_pointer_type, get_contiguous_base, add_mem_operand)
>       (add_fixed_operand, use_contiguous_load_insn)
>       (use_contiguous_store_insn): New.
>       * config/arm/arm-mve-builtins.h (memory_vector_mode)
>       (infer_pointer_type, require_pointer_type, get_contiguous_base)
>       (add_mem_operand)
>       (add_fixed_operand, use_contiguous_load_insn)
>       (use_contiguous_store_insn): New.
> ---
>  gcc/config/arm/arm-mve-builtins-functions.h |  56 ++++++++++
>  gcc/config/arm/arm-mve-builtins.cc          | 116 ++++++++++++++++++++
>  gcc/config/arm/arm-mve-builtins.h           |  28 ++++-
>  3 files changed, 199 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-functions.h
> b/gcc/config/arm/arm-mve-builtins-functions.h
> index eba1f071af0..6d234a2dd7c 100644
> --- a/gcc/config/arm/arm-mve-builtins-functions.h
> +++ b/gcc/config/arm/arm-mve-builtins-functions.h
> @@ -966,6 +966,62 @@ public:
>    }
>  };
> 
> +/* A function_base that sometimes or always operates on tuples of
> +   vectors.  */
> +class multi_vector_function : public function_base
> +{
> +public:
> +  CONSTEXPR multi_vector_function (unsigned int vectors_per_tuple)
> +    : m_vectors_per_tuple (vectors_per_tuple) {}
> +
> +  unsigned int
> +  vectors_per_tuple () const override
> +  {
> +    return m_vectors_per_tuple;
> +  }
> +
> +  /* The number of vectors in a tuple, or 1 if the function only operates
> +     on single vectors.  */
> +  unsigned int m_vectors_per_tuple;
> +};
> +
> +/* A function_base that loads or stores contiguous memory elements
> +   without extending or truncating them.  */
> +class full_width_access : public multi_vector_function
> +{
> +public:
> +  CONSTEXPR full_width_access (unsigned int vectors_per_tuple = 1)
> +    : multi_vector_function (vectors_per_tuple) {}
> +
> +  tree
> +  memory_scalar_type (const function_instance &fi) const override
> +  {
> +    return fi.scalar_type (0);
> +  }
> +
> +  machine_mode
> +  memory_vector_mode (const function_instance &fi) const override
> +  {
> +    machine_mode mode = fi.vector_mode (0);
> +    /* Vectors of floating-point are managed in memory as vectors of
> +       integers.  */
> +    switch (mode)
> +      {
> +      case E_V4SFmode:
> +     mode = E_V4SImode;
> +     break;
> +      case E_V8HFmode:
> +     mode = E_V8HImode;
> +     break;
> +      }
> +
> +    if (m_vectors_per_tuple != 1)
> +      mode = targetm.array_mode (mode, m_vectors_per_tuple).require ();
> +
> +    return mode;
> +  }
> +};
> +
>  } /* end namespace arm_mve */
> 
>  /* Declare the global function base NAME, creating it from an instance
> diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-
> builtins.cc
> index 02dc8fa9b73..a265cb05553 100644
> --- a/gcc/config/arm/arm-mve-builtins.cc
> +++ b/gcc/config/arm/arm-mve-builtins.cc
> @@ -36,6 +36,7 @@
>  #include "fold-const.h"
>  #include "gimple.h"
>  #include "gimple-iterator.h"
> +#include "explow.h"
>  #include "emit-rtl.h"
>  #include "langhooks.h"
>  #include "stringpool.h"
> @@ -529,6 +530,22 @@ matches_type_p (const_tree model_type, const_tree
> candidate)
>         && TYPE_MAIN_VARIANT (model_type) == TYPE_MAIN_VARIANT
> (candidate));
>  }
> 
> +/* If TYPE is a valid MVE element type, return the corresponding type
> +   suffix, otherwise return NUM_TYPE_SUFFIXES.  */
> +static type_suffix_index
> +find_type_suffix_for_scalar_type (const_tree type)
> +{
> +  /* A linear search should be OK here, since the code isn't hot and
> +     the number of types is only small.  */
> +  for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
> +      {
> +     vector_type_index vector_i = type_suffixes[suffix_i].vector_type;
> +     if (matches_type_p (scalar_types[vector_i], type))
> +       return type_suffix_index (suffix_i);
> +      }
> +  return NUM_TYPE_SUFFIXES;
> +}
> +
>  /* Report an error against LOCATION that the user has tried to use
>     a floating point function when the mve.fp extension is disabled.  */
>  static void
> @@ -1125,6 +1142,37 @@ function_resolver::resolve_to (mode_suffix_index
> mode,
>    return res;
>  }
> 
> +/* Require argument ARGNO to be a pointer to a scalar type that has a
> +   corresponding type suffix.  Return that type suffix on success,
> +   otherwise report an error and return NUM_TYPE_SUFFIXES.  */
> +type_suffix_index
> +function_resolver::infer_pointer_type (unsigned int argno)
> +{
> +  tree actual = get_argument_type (argno);
> +  if (actual == error_mark_node)
> +    return NUM_TYPE_SUFFIXES;
> +
> +  if (TREE_CODE (actual) != POINTER_TYPE)
> +    {
> +      error_at (location, "passing %qT to argument %d of %qE, which"
> +             " expects a pointer type", actual, argno + 1, fndecl);
> +      return NUM_TYPE_SUFFIXES;
> +    }
> +
> +  tree target = TREE_TYPE (actual);
> +  type_suffix_index type = find_type_suffix_for_scalar_type (target);
> +  if (type == NUM_TYPE_SUFFIXES)
> +    {
> +      error_at (location, "passing %qT to argument %d of %qE, but %qT is not"
> +             " a valid MVE element type", actual, argno + 1, fndecl,
> +             build_qualified_type (target, 0));
> +      return NUM_TYPE_SUFFIXES;
> +    }
> +  unsigned int bits = type_suffixes[type].element_bits;
> +
> +  return type;
> +}
> +
>  /* Require argument ARGNO to be a single vector or a tuple of
> NUM_VECTORS
>     vectors; NUM_VECTORS is 1 for the former.  Return the associated type
>     suffix on success, using TYPE_SUFFIX_b for predicates.  Report an error
> @@ -1498,6 +1546,22 @@ function_resolver::require_scalar_type (unsigned
> int argno,
>    return true;
>  }
> 
> +/* Require argument ARGNO to be some form of pointer, without being
> specific
> +   about its target type.  Return true if the argument has the right form,
> +   otherwise report an appropriate error.  */
> +bool
> +function_resolver::require_pointer_type (unsigned int argno)
> +{
> +  if (!scalar_argument_p (argno))
> +    {
> +      error_at (location, "passing %qT to argument %d of %qE, which"
> +             " expects a scalar pointer", get_argument_type (argno),
> +             argno + 1, fndecl);
> +      return false;
> +    }
> +  return true;
> +}
> +
>  /* Require the function to have exactly EXPECTED arguments.  Return true
>     if it does, otherwise report an appropriate error.  */
>  bool
> @@ -1955,6 +2019,14 @@ function_expander::direct_optab_handler (optab
> op, unsigned int suffix_i)
>    return ::direct_optab_handler (op, vector_mode (suffix_i));
>  }
> 
> +/* Return the base address for a contiguous load or store
> +   function.  */
> +rtx
> +function_expander::get_contiguous_base ()
> +{
> +  return args[0];
> +}
> +
>  /* For a function that does the equivalent of:
> 
>       OUTPUT = COND ? FN (INPUTS) : FALLBACK;
> @@ -2043,6 +2115,26 @@ function_expander::add_integer_operand
> (HOST_WIDE_INT x)
>    create_integer_operand (&m_ops.last (), x);
>  }
> 
> +/* Add a memory operand with mode MODE and address ADDR.  */
> +void
> +function_expander::add_mem_operand (machine_mode mode, rtx addr)
> +{
> +  gcc_assert (VECTOR_MODE_P (mode));
> +  rtx mem = gen_rtx_MEM (mode, memory_address (mode, addr));
> +  /* The memory is only guaranteed to be element-aligned.  */
> +  set_mem_align (mem, GET_MODE_ALIGNMENT (GET_MODE_INNER
> (mode)));
> +  add_fixed_operand (mem);
> +}
> +
> +/* Add an operand that must be X.  The only way of legitimizing an
> +   invalid X is to reload the address of a MEM.  */
> +void
> +function_expander::add_fixed_operand (rtx x)
> +{
> +  m_ops.safe_grow (m_ops.length () + 1, true);
> +  create_fixed_operand (&m_ops.last (), x);
> +}
> +
>  /* Generate instruction ICODE, given that its operands have already
>     been added to M_OPS.  Return the value of the first operand.  */
>  rtx
> @@ -2137,6 +2229,30 @@ function_expander::use_cond_insn (insn_code
> icode, unsigned int merge_argno)
>    return generate_insn (icode);
>  }
> 
> +/* Implement the call using instruction ICODE, which loads memory operand
> 1
> +   into register operand 0.  */
> +rtx
> +function_expander::use_contiguous_load_insn (insn_code icode)
> +{
> +  machine_mode mem_mode = memory_vector_mode ();
> +
> +  add_output_operand (icode);
> +  add_mem_operand (mem_mode, get_contiguous_base ());
> +  return generate_insn (icode);
> +}
> +
> +/* Implement the call using instruction ICODE, which stores register operand
> 1
> +   into memory operand 0.  */
> +rtx
> +function_expander::use_contiguous_store_insn (insn_code icode)
> +{
> +  machine_mode mem_mode = memory_vector_mode ();
> +
> +  add_mem_operand (mem_mode, get_contiguous_base ());
> +  add_input_operand (icode, args[1]);
> +  return generate_insn (icode);
> +}
> +
>  /* Implement the call using a normal unpredicated optab for PRED_none.
> 
>     <optab> corresponds to:
> diff --git a/gcc/config/arm/arm-mve-builtins.h b/gcc/config/arm/arm-mve-
> builtins.h
> index 4fd230fe4c7..9c219fa8db4 100644
> --- a/gcc/config/arm/arm-mve-builtins.h
> +++ b/gcc/config/arm/arm-mve-builtins.h
> @@ -278,6 +278,7 @@ public:
> 
>    unsigned int vectors_per_tuple () const;
>    tree memory_scalar_type () const;
> +  machine_mode memory_vector_mode () const;
> 
>    const mode_suffix_info &mode_suffix () const;
> 
> @@ -383,6 +384,7 @@ public:
>                  type_suffix_index = NUM_TYPE_SUFFIXES,
>                  type_suffix_index = NUM_TYPE_SUFFIXES);
> 
> +  type_suffix_index infer_pointer_type (unsigned int);
>    type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int);
>    type_suffix_index infer_vector_type (unsigned int);
> 
> @@ -394,8 +396,9 @@ public:
>                                   type_suffix_index,
>                                   type_class_index = SAME_TYPE_CLASS,
>                                   unsigned int = SAME_SIZE);
> -  bool require_integer_immediate (unsigned int);
>    bool require_scalar_type (unsigned int, const char *);
> +  bool require_pointer_type (unsigned int);
> +  bool require_integer_immediate (unsigned int);
>    bool require_derived_scalar_type (unsigned int, type_class_index,
>                                   unsigned int = SAME_SIZE);
> 
> @@ -476,18 +479,23 @@ public:
> 
>    insn_code direct_optab_handler (optab, unsigned int = 0);
> 
> +  rtx get_contiguous_base ();
>    rtx get_fallback_value (machine_mode, unsigned int, unsigned int &);
>    rtx get_reg_target ();
> 
>    void add_output_operand (insn_code);
>    void add_input_operand (insn_code, rtx);
>    void add_integer_operand (HOST_WIDE_INT);
> +  void add_mem_operand (machine_mode, rtx);
> +  void add_fixed_operand (rtx);
>    rtx generate_insn (insn_code);
> 
>    rtx use_exact_insn (insn_code);
>    rtx use_unpred_insn (insn_code);
>    rtx use_pred_x_insn (insn_code);
>    rtx use_cond_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO);
> +  rtx use_contiguous_load_insn (insn_code);
> +  rtx use_contiguous_store_insn (insn_code);
> 
>    rtx map_to_rtx_codes (rtx_code, rtx_code, rtx_code);
> 
> @@ -528,6 +536,15 @@ public:
>      gcc_unreachable ();
>    }
> 
> +  /* If the function addresses memory, return a vector mode whose
> +     GET_MODE_NUNITS is the number of elements addressed and whose
> +     GET_MODE_INNER is the mode of a single scalar memory element.  */
> +  virtual machine_mode
> +  memory_vector_mode (const function_instance &) const
> +  {
> +    gcc_unreachable ();
> +  }
> +
>    /* Try to fold the given gimple call.  Return the new gimple statement
>       on success, otherwise return null.  */
>    virtual gimple *fold (gimple_folder &) const { return NULL; }
> @@ -661,6 +678,15 @@ function_instance::memory_scalar_type () const
>    return base->memory_scalar_type (*this);
>  }
> 
> +/* If the function addresses memory, return a vector mode whose
> +   GET_MODE_NUNITS is the number of elements addressed and whose
> +   GET_MODE_INNER is the mode of a single scalar memory element.  */
> +inline machine_mode
> +function_instance::memory_vector_mode () const
> +{
> +  return base->memory_vector_mode (*this);
> +}
> +
>  /* Return information about the function's mode suffix.  */
>  inline const mode_suffix_info &
>  function_instance::mode_suffix () const
> --
> 2.34.1
RE: [PATCH 3/6] arm: [MVE intrinsics] Add support for contiguous loads and stores

Reply via email to