Re: [PATCH v9 1/2] aarch64: Add AdvSIMD faminmax intrinsics

Richard Sandiford Wed, 18 Sep 2024 07:35:31 -0700

<saurabh....@arm.com> writes:
> The AArch64 FEAT_FAMINMAX extension is optional from Armv9.2-a and
> mandatory from Armv9.5-a. It introduces instructions for computing the
> floating point absolute maximum and minimum of the two vectors element-wise.
>
> This patch introduces AdvSIMD faminmax intrinsics. The intrinsics of
> this extension are implemented as the following builtin functions:
> * vamax_f16
> * vamaxq_f16
> * vamax_f32
> * vamaxq_f32
> * vamaxq_f64
> * vamin_f16
> * vaminq_f16
> * vamin_f32
> * vaminq_f32
> * vaminq_f64
>
> We are defining a new way to add AArch64 AdvSIMD intrinsics by listing
> all the intrinsics in a .def file and then using that .def file to
> initialise various data structures. This would lead to more concise code
> and easier addition of the new AdvSIMD intrinsics in future.
>
> The faminmax intrinsics are defined using the new approach.
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64-builtins.cc
>       (ENTRY): Macro to parse the contents of
>       aarch64-simd-pragma-builtins.def.
>       (ENTRY_VHSDF): Macro to parse the contents of
>       aarch64-simd-pragma-builtins.def.
>       (enum aarch64_builtins): New enum values for faminmax builtins
>       via aarch64-simd-pragma-builtins.def.
>       (enum class aarch64_builtin_signatures): Enum class to specify
>       the number of operands a builtin will take.
>       (struct aarch64_pragma_builtins_data): Struct to hold data from
>       aarch64-simd-pragma-builtins.def.
>       (aarch64_fntype): New function to define function types of
>       intrinsics given an object of type aarch64_pragma_builtins_data.
>       (aarch64_init_pragma_builtins): New function to define pragma
>       builtins.
>       (aarch64_get_pragma_builtin): New function to get a row of
>       aarch64_pragma_builtins, given code.
>       (handle_arm_neon_h): Modify to call
>       aarch64_init_pragma_builtins.
>       (aarch64_general_check_builtin_call): Modify to check whether
>       required flag is being used for pragma builtins.
>       (aarch64_expand_pragma_builtin): New function to emit
>       instructions of pragma_builtin.
>       (aarch64_general_expand_builtin): Modify to call
>       aarch64_expand_pragma_builtin.
>       * config/aarch64/aarch64-option-extensions.def
>       (AARCH64_OPT_EXTENSION): Introduce new flag for this extension.
>       * config/aarch64/aarch64-simd.md
>       (@aarch64_<faminmax_uns_op><mode>): Instruction pattern for
>       faminmax intrinsics.
>       * config/aarch64/aarch64.h
>       (TARGET_FAMINMAX): Introduce new flag for this extension.
>       * config/aarch64/iterators.md: New iterators and unspecs.
>       * doc/invoke.texi: Document extension in AArch64 Options.
>       * config/aarch64/aarch64-simd-pragma-builtins.def: New file to
>       list pragma builtins.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/simd/faminmax-builtins-no-flag.c: New test.
>       * gcc.target/aarch64/simd/faminmax-builtins.c: New test.


LGTM.  OK, thanks.

It looks from MAINTAINERS that you don't currently have commit access.
Could you follow the procedure on https://gcc.gnu.org/gitwrite.html ,
listing me as sponsor?  Or if you already have an account for binutils,
I think the process is to email overseers directly.

Thanks for adding the new builtins infrastructure.

Richard

> ---
>  gcc/config/aarch64/aarch64-builtins.cc        | 119 ++++++++++++++++++
>  .../aarch64/aarch64-option-extensions.def     |   2 +
>  .../aarch64/aarch64-simd-pragma-builtins.def  |  23 ++++
>  gcc/config/aarch64/aarch64-simd.md            |  10 ++
>  gcc/config/aarch64/aarch64.h                  |   4 +
>  gcc/config/aarch64/iterators.md               |   9 ++
>  gcc/doc/invoke.texi                           |   2 +
>  .../aarch64/simd/faminmax-builtins-no-flag.c  |  10 ++
>  .../aarch64/simd/faminmax-builtins.c          | 115 +++++++++++++++++
>  9 files changed, 294 insertions(+)
>  create mode 100644 gcc/config/aarch64/aarch64-simd-pragma-builtins.def
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
> b/gcc/config/aarch64/aarch64-builtins.cc
> index eb878b933fe..6266bea3b39 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -757,6 +757,18 @@ typedef struct
>  #define VAR1(T, N, MAP, FLAG, A) \
>    AARCH64_SIMD_BUILTIN_##T##_##N##A,
>  
> +#undef ENTRY
> +#define ENTRY(N, S, M, U, F) \
> +  AARCH64_##N,
> +
> +#undef ENTRY_VHSDF
> +#define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC, EXTENSIONS) \
> +  AARCH64_##NAME##_f16, \
> +  AARCH64_##NAME##q_f16, \
> +  AARCH64_##NAME##_f32, \
> +  AARCH64_##NAME##q_f32, \
> +  AARCH64_##NAME##q_f64,
> +
>  enum aarch64_builtins
>  {
>    AARCH64_BUILTIN_MIN,
> @@ -829,6 +841,10 @@ enum aarch64_builtins
>    AARCH64_RBIT,
>    AARCH64_RBITL,
>    AARCH64_RBITLL,
> +  /* Pragma builtins.  */
> +  AARCH64_PRAGMA_BUILTIN_START,
> +#include "aarch64-simd-pragma-builtins.def"
> +  AARCH64_PRAGMA_BUILTIN_END,
>    /* System register builtins.  */
>    AARCH64_RSR,
>    AARCH64_RSRP,
> @@ -947,6 +963,7 @@ const char *aarch64_scalar_builtin_types[] = {
>  
>  extern GTY(()) aarch64_simd_type_info aarch64_simd_types[];
>  
> +#undef ENTRY
>  #define ENTRY(E, M, Q, G)  \
>    {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q},
>  struct aarch64_simd_type_info aarch64_simd_types [] = {
> @@ -1547,6 +1564,78 @@ aarch64_init_simd_builtin_functions (bool 
> called_from_pragma)
>      }
>  }
>  
> +enum class aarch64_builtin_signatures
> +{
> +  binary,
> +};
> +
> +#undef ENTRY
> +#define ENTRY(N, S, M, U, F) \
> +  {#N, aarch64_builtin_signatures::S, E_##M##mode, U, F},
> +
> +#undef ENTRY_VHSDF
> +#define ENTRY_VHSDF(NAME, SIGNATURE, UNSPEC, EXTENSIONS) \
> +  ENTRY (NAME##_f16, SIGNATURE, V4HF, UNSPEC, EXTENSIONS) \
> +  ENTRY (NAME##q_f16, SIGNATURE, V8HF, UNSPEC, EXTENSIONS) \
> +  ENTRY (NAME##_f32, SIGNATURE, V2SF, UNSPEC, EXTENSIONS) \
> +  ENTRY (NAME##q_f32, SIGNATURE, V4SF, UNSPEC, EXTENSIONS) \
> +  ENTRY (NAME##q_f64, SIGNATURE, V2DF, UNSPEC, EXTENSIONS)
> +
> +/* Initialize pragma builtins.  */
> +
> +struct aarch64_pragma_builtins_data
> +{
> +  const char *name;
> +  aarch64_builtin_signatures signature;
> +  machine_mode mode;
> +  int unspec;
> +  aarch64_feature_flags required_extensions;
> +};
> +
> +static aarch64_pragma_builtins_data aarch64_pragma_builtins[] = {
> +#include "aarch64-simd-pragma-builtins.def"
> +};
> +
> +static tree
> +aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data)
> +{
> +  auto type = aarch64_simd_builtin_type (builtin_data.mode, qualifier_none);
> +  switch (builtin_data.signature)
> +    {
> +    case aarch64_builtin_signatures::binary:
> +      return build_function_type_list (type, type, type, NULL_TREE);
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +
> +static void
> +aarch64_init_pragma_builtins ()
> +{
> +  for (size_t i = 0; i < ARRAY_SIZE (aarch64_pragma_builtins); ++i)
> +    {
> +      auto data = aarch64_pragma_builtins[i];
> +      auto fntype = aarch64_fntype (data);
> +      auto code = AARCH64_PRAGMA_BUILTIN_START + i + 1;
> +      aarch64_builtin_decls[code]
> +     = aarch64_general_simulate_builtin (data.name, fntype, code);
> +    }
> +}
> +
> +/* If the builtin function with code CODE has an entry in
> +   aarch64_pragma_builtins, return its entry, otherwise return null.  */
> +
> +static const aarch64_pragma_builtins_data*
> +aarch64_get_pragma_builtin (int code)
> +{
> +  if (!(code > AARCH64_PRAGMA_BUILTIN_START
> +     && code < AARCH64_PRAGMA_BUILTIN_END))
> +    return NULL;
> +
> +  auto idx = code - (AARCH64_PRAGMA_BUILTIN_START + 1);
> +  return &aarch64_pragma_builtins[idx];
> +}
> +
>  /* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type
>     indexed by TYPE_INDEX.  */
>  static void
> @@ -1640,6 +1729,7 @@ handle_arm_neon_h (void)
>  
>    aarch64_init_simd_builtin_functions (true);
>    aarch64_init_simd_intrinsics ();
> +  aarch64_init_pragma_builtins ();
>  }
>  
>  static void
> @@ -2326,6 +2416,12 @@ aarch64_general_check_builtin_call (location_t 
> location, vec<location_t>,
>      return aarch64_check_required_extensions (location, decl,
>                                             AARCH64_FL_MEMTAG);
>  
> +  if (auto builtin_data = aarch64_get_pragma_builtin (code))
> +    {
> +      auto flags = builtin_data->required_extensions;
> +      return aarch64_check_required_extensions (location, decl, flags);
> +    }
> +
>    return true;
>  }
>  
> @@ -3189,6 +3285,25 @@ aarch64_expand_builtin_data_intrinsic (unsigned int 
> fcode, tree exp, rtx target)
>    return ops[0].value;
>  }
>  
> +static rtx
> +aarch64_expand_pragma_builtin (tree exp, rtx target,
> +                            const aarch64_pragma_builtins_data *builtin_data)
> +{
> +  expand_operand ops[3];
> +  auto mode = builtin_data->mode;
> +  auto op1 = expand_normal (CALL_EXPR_ARG (exp, 0));
> +  auto op2 = expand_normal (CALL_EXPR_ARG (exp, 1));
> +  create_output_operand (&ops[0], target, mode);
> +  create_input_operand (&ops[1], op1, mode);
> +  create_input_operand (&ops[2], op2, mode);
> +
> +  auto unspec = builtin_data->unspec;
> +  auto icode = code_for_aarch64 (unspec, mode);
> +  expand_insn (icode, 3, ops);
> +
> +  return target;
> +}
> +
>  /* Expand an expression EXP as fpsr or fpcr setter (depending on
>     UNSPEC) using MODE.  */
>  static void
> @@ -3369,6 +3484,9 @@ aarch64_general_expand_builtin (unsigned int fcode, 
> tree exp, rtx target,
>        && fcode <= AARCH64_RBITLL)
>      return aarch64_expand_builtin_data_intrinsic (fcode, exp, target);
>  
> +  if (auto builtin_data = aarch64_get_pragma_builtin (fcode))
> +    return aarch64_expand_pragma_builtin (exp, target, builtin_data);
> +
>    gcc_unreachable ();
>  }
>  
> @@ -4021,6 +4139,7 @@ aarch64_resolve_overloaded_builtin_general (location_t 
> loc, tree function,
>  #undef CF3
>  #undef CF4
>  #undef CF10
> +#undef ENTRY_VHSDF
>  #undef VAR1
>  #undef VAR2
>  #undef VAR3
> diff --git a/gcc/config/aarch64/aarch64-option-extensions.def 
> b/gcc/config/aarch64/aarch64-option-extensions.def
> index 6998627f377..8279f5a76ea 100644
> --- a/gcc/config/aarch64/aarch64-option-extensions.def
> +++ b/gcc/config/aarch64/aarch64-option-extensions.def
> @@ -234,6 +234,8 @@ AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs")
>  
>  AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "fp8")
>  
> +AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax")
> +
>  #undef AARCH64_OPT_FMV_EXTENSION
>  #undef AARCH64_OPT_EXTENSION
>  #undef AARCH64_FMV_FEATURE
> diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def 
> b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
> new file mode 100644
> index 00000000000..f432185be46
> --- /dev/null
> +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
> @@ -0,0 +1,23 @@
> +/* AArch64 SIMD pragma builtins
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   Contributed by ARM Ltd.
> +
> +   This file is part of GCC.
> +
> +   GCC is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3, or (at your option)
> +   any later version.
> +
> +   GCC is distributed in the hope that it will be useful, but
> +   WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with GCC; see the file COPYING3.  If not see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +// faminmax
> +ENTRY_VHSDF (vamax, binary, UNSPEC_FAMAX, AARCH64_FL_FAMINMAX)
> +ENTRY_VHSDF (vamin, binary, UNSPEC_FAMIN, AARCH64_FL_FAMINMAX)
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 23c03a96371..67f0fe26f93 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -9910,3 +9910,13 @@
>    "shl\\t%d0, %d1, #16"
>    [(set_attr "type" "neon_shift_imm")]
>  )
> +
> +;; faminmax
> +(define_insn "@aarch64_<faminmax_uns_op><mode>"
> +  [(set (match_operand:VHSDF 0 "register_operand" "=w")
> +     (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
> +                    (match_operand:VHSDF 2 "register_operand" "w")]
> +                   FAMINMAX_UNS))]
> +  "TARGET_FAMINMAX"
> +  "<faminmax_uns_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
> +)
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 2dfb999bea5..e754c4ea748 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -457,6 +457,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE 
> ATTRIBUTE_UNUSED
>      enabled through +gcs.  */
>  #define TARGET_GCS AARCH64_HAVE_ISA (GCS)
>  
> +/* Floating Point Absolute Maximum/Minimum extension instructions are
> +   enabled through +faminmax.  */
> +#define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX)
> +
>  /* Prefer different predicate registers for the output of a predicated
>     operation over re-using an existing input predicate.  */
>  #define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 20a318e023b..17ac5e073aa 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1057,6 +1057,8 @@
>      UNSPEC_BFCVTN2     ; Used in aarch64-simd.md.
>      UNSPEC_BFCVT       ; Used in aarch64-simd.md.
>      UNSPEC_FCVTXN    ; Used in aarch64-simd.md.
> +    UNSPEC_FAMAX       ; Used in aarch64-simd.md.
> +    UNSPEC_FAMIN       ; Used in aarch64-simd.md.
>  
>      ;; All used in aarch64-sve2.md
>      UNSPEC_FCVTN
> @@ -4463,3 +4465,10 @@
>     (UNSPECV_SET_FPCR "fpcr")])
>  
>  (define_int_attr bits_etype [(8 "b") (16 "h") (32 "s") (64 "d")])
> +
> +;; Iterators and attributes for faminmax
> +
> +(define_int_iterator FAMINMAX_UNS [UNSPEC_FAMAX UNSPEC_FAMIN])
> +
> +(define_int_attr faminmax_uns_op
> +  [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")])
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 511b8c8d311..e005d9bbfb0 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -21865,6 +21865,8 @@ Enable support for Armv8.9-a/9.4-a translation 
> hardening extension.
>  Enable the RCpc3 (Release Consistency) extension.
>  @item fp8
>  Enable the fp8 (8-bit floating point) extension.
> +@item faminmax
> +Enable the Floating Point Absolute Maximum/Minimum extension.
>  
>  @end table
>  
> diff --git 
> a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c 
> b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c
> new file mode 100644
> index 00000000000..63ed1508c23
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins-no-flag.c
> @@ -0,0 +1,10 @@
> +/* { dg-do assemble} */
> +/* { dg-additional-options "-march=armv9-a" } */
> +
> +#include "arm_neon.h"
> +
> +void
> +test (float32x4_t a, float32x4_t b)
> +{
> +  vamaxq_f32 (a, b); /* { dg-error {ACLE function 'vamaxq_f32' requires ISA 
> extension 'faminmax'} } */
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c 
> b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c
> new file mode 100644
> index 00000000000..7e4f3eba81a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/simd/faminmax-builtins.c
> @@ -0,0 +1,115 @@
> +/* { dg-do assemble} */
> +/* { dg-additional-options "-O3 -march=armv9-a+faminmax" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "arm_neon.h"
> +
> +/*
> +** test_vamax_f16:
> +**   famax   v0.4h, v0.4h, v1.4h
> +**   ret
> +*/
> +float16x4_t
> +test_vamax_f16 (float16x4_t a, float16x4_t b)
> +{
> +  return vamax_f16 (a, b);
> +}
> +
> +/*
> +** test_vamaxq_f16:
> +**   famax   v0.8h, v0.8h, v1.8h
> +**   ret
> +*/
> +float16x8_t
> +test_vamaxq_f16 (float16x8_t a, float16x8_t b)
> +{
> +  return vamaxq_f16 (a, b);
> +}
> +
> +/*
> +** test_vamax_f32:
> +**   famax   v0.2s, v0.2s, v1.2s
> +**   ret
> +*/
> +float32x2_t
> +test_vamax_f32 (float32x2_t a, float32x2_t b)
> +{
> +  return vamax_f32 (a, b);
> +}
> +
> +/*
> +** test_vamaxq_f32:
> +**   famax   v0.4s, v0.4s, v1.4s
> +**   ret
> +*/
> +float32x4_t
> +test_vamaxq_f32 (float32x4_t a, float32x4_t b)
> +{
> +  return vamaxq_f32 (a, b);
> +}
> +
> +/*
> +** test_vamaxq_f64:
> +**   famax   v0.2d, v0.2d, v1.2d
> +**   ret
> +*/
> +float64x2_t
> +test_vamaxq_f64 (float64x2_t a, float64x2_t b)
> +{
> +  return vamaxq_f64 (a, b);
> +}
> +
> +/*
> +** test_vamin_f16:
> +**   famin   v0.4h, v0.4h, v1.4h
> +**   ret
> +*/
> +float16x4_t
> +test_vamin_f16 (float16x4_t a, float16x4_t b)
> +{
> +  return vamin_f16 (a, b);
> +}
> +
> +/*
> +** test_vaminq_f16:
> +**   famin   v0.8h, v0.8h, v1.8h
> +**   ret
> +*/
> +float16x8_t
> +test_vaminq_f16 (float16x8_t a, float16x8_t b)
> +{
> +  return vaminq_f16 (a, b);
> +}
> +
> +/*
> +** test_vamin_f32:
> +**   famin   v0.2s, v0.2s, v1.2s
> +**   ret
> +*/
> +float32x2_t
> +test_vamin_f32 (float32x2_t a, float32x2_t b)
> +{
> +  return vamin_f32 (a, b);
> +}
> +
> +/*
> +** test_vaminq_f32:
> +**   famin   v0.4s, v0.4s, v1.4s
> +**   ret
> +*/
> +float32x4_t
> +test_vaminq_f32 (float32x4_t a, float32x4_t b)
> +{
> +  return vaminq_f32 (a, b);
> +}
> +
> +/*
> +** test_vaminq_f64:
> +**   famin   v0.2d, v0.2d, v1.2d
> +**   ret
> +*/
> +float64x2_t
> +test_vaminq_f64 (float64x2_t a, float64x2_t b)
> +{
> +  return vaminq_f64 (a, b);
> +}

Re: [PATCH v9 1/2] aarch64: Add AdvSIMD faminmax intrinsics

Reply via email to