gcc/ChangeLog:
* config/aarch64/aarch64.opt: Add -mveclibabi option.
* config/aarch64/aarch64-opts.h: Add aarch64_veclibabi enum.
* config/aarch64/aarch64-protos.h: Add
aarch64_builtin_vectorized_function declaration.
* config/aarch64/aarch64.cc: Handle -mveclibabi option and pure
scalable type info for scalable vectors without "SVE type" attributes.
* config/aarch64/aarch64-builtins.cc: Add
aarch64_builtin_vectorized_function definition.
* doc/invoke.texi: Document -mveclibabi for AArch64 targets.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/vect-vecabi-sleefgnu-neon.c: New testcase.
* gcc.target/aarch64/vect-vecabi-sleefgnu-sve.c: New testcase.
---
gcc/config/aarch64/aarch64-builtins.cc | 113 ++++++++++++++++++
gcc/config/aarch64/aarch64-opts.h | 5 +
gcc/config/aarch64/aarch64-protos.h | 3 +
gcc/config/aarch64/aarch64.cc | 66 ++++++++++
gcc/config/aarch64/aarch64.opt | 15 +++
gcc/doc/invoke.texi | 15 +++
.../aarch64/vect-vecabi-sleefgnu-neon.c | 16 +++
.../aarch64/vect-vecabi-sleefgnu-sve.c | 16 +++
8 files changed, 249 insertions(+)
create mode 100644
gcc/testsuite/gcc.target/aarch64/vect-vecabi-sleefgnu-neon.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/vect-vecabi-sleefgnu-sve.c
diff --git a/gcc/config/aarch64/aarch64-builtins.cc
b/gcc/config/aarch64/aarch64-builtins.cc
index cc6b7c01fd1..f53fa91b8d0 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -47,6 +47,7 @@
#include "stringpool.h"
#include "attribs.h"
#include "gimple-fold.h"
+#include "builtins.h"
#define v8qi_UP E_V8QImode
#define v8di_UP E_V8DImode
@@ -3450,6 +3451,118 @@ aarch64_resolve_overloaded_builtin_general (location_t
loc, tree function,
return NULL_TREE;
}
+/* The vector library abi to use, if any. */
+extern enum aarch64_veclibabi aarch64_selected_veclibabi;
+
+/* Returns a function declaration for a vectorized version of the combined
+ function with combined_fn code FN and the result vector type TYPE.
+ NULL_TREE is returned if there is none available. */
+tree
+aarch64_builtin_vectorized_function (unsigned int fn_code,
+ tree type_out, tree type_in)
+{
+ if (TREE_CODE (type_out) != VECTOR_TYPE
+ || TREE_CODE (type_in) != VECTOR_TYPE
+ || aarch64_selected_veclibabi != aarch64_veclibabi_type_sleefgnu
+ || !flag_unsafe_math_optimizations)
+ return NULL_TREE;
+
+ machine_mode mode = TYPE_MODE (TREE_TYPE (type_out));
+ poly_uint64 n = TYPE_VECTOR_SUBPARTS (type_out);
+ if (mode != TYPE_MODE (TREE_TYPE (type_in))
+ || !known_eq (n, TYPE_VECTOR_SUBPARTS (type_in)))
+ return NULL_TREE;
+
+ bool is_scalable = !n.is_constant ();
+ if (is_scalable)
+ {
+ /* SVE is needed for scalable vectors, a SVE register's size is
+ always a multiple of 128. */
+ if (!TARGET_SVE
+ || (mode == DFmode && !known_eq (n, poly_uint64 (2, 2)))
+ || (mode == SFmode && !known_eq (n, poly_uint64 (4, 4))))
+ return NULL_TREE;
+ }
+ else
+ {
+ /* A NEON register can hold two doubles or one float. */
+ if (!TARGET_SIMD
+ || (mode == DFmode && n.to_constant () != 2)
+ || (mode == SFmode && n.to_constant () != 4))
+ return NULL_TREE;
+ }
+
+ tree fntype;
+ combined_fn fn = combined_fn (fn_code);
+ const char *argencoding;
+ switch (fn)
+ {
+ CASE_CFN_EXP:
+ CASE_CFN_LOG:
+ CASE_CFN_LOG10:
+ CASE_CFN_TANH:
+ CASE_CFN_TAN:
+ CASE_CFN_ATAN:
+ CASE_CFN_ATANH:
+ CASE_CFN_CBRT:
+ CASE_CFN_SINH:
+ CASE_CFN_SIN:
+ CASE_CFN_ASINH:
+ CASE_CFN_ASIN:
+ CASE_CFN_COSH:
+ CASE_CFN_COS:
+ CASE_CFN_ACOSH:
+ CASE_CFN_ACOS:
+ fntype = build_function_type_list (type_out, type_in, NULL);
+ argencoding = "v";
+ break;
+
+ CASE_CFN_POW:
+ CASE_CFN_ATAN2:
+ fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+ argencoding = "vv";
+ break;
+
+ default:
+ return NULL_TREE;
+ }
+
+ tree fndecl = mathfn_built_in (mode == DFmode
+ ? double_type_node : float_type_node, fn);
+ const char *scalar_name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
+ /* Builtins will always be prefixed with '__builtin_'. */
+ gcc_assert (strncmp (scalar_name, "__builtin_", 10) == 0);
+ scalar_name += 10;
+
+ char vectorized_name[32];
+ if (is_scalable)
+ {
+ /* SVE ISA */
+ int n = snprintf (vectorized_name, sizeof (vectorized_name),
+ "_ZGVsNx%s_%s", argencoding, scalar_name);
+ if (n < 0 || n > sizeof (vectorized_name))
+ return NULL_TREE;
+ }
+ else
+ {
+ /* NEON ISA */
+ int n = snprintf (vectorized_name, sizeof (vectorized_name),
+ "_ZGVnN%d%s_%s", mode == SFmode ? 4 : 2,
+ argencoding, scalar_name);
+ if (n < 0 || n > sizeof (vectorized_name))
+ return NULL_TREE;
+ }
+
+ tree new_fndecl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
+ get_identifier (vectorized_name), fntype);
+ TREE_PUBLIC (new_fndecl) = 1;
+ TREE_READONLY (new_fndecl) = 1;
+ DECL_EXTERNAL (new_fndecl) = 1;
+ DECL_IS_NOVOPS (new_fndecl) = 1;
+
+ return new_fndecl;
+}
+
#undef AARCH64_CHECK_BUILTIN_MODE
#undef AARCH64_FIND_FRINT_VARIANT
#undef CF0
diff --git a/gcc/config/aarch64/aarch64-opts.h
b/gcc/config/aarch64/aarch64-opts.h
index a9f3e2715ca..d12871b893c 100644
--- a/gcc/config/aarch64/aarch64-opts.h
+++ b/gcc/config/aarch64/aarch64-opts.h
@@ -98,4 +98,9 @@ enum aarch64_key_type {
AARCH64_KEY_B
};
+enum aarch64_veclibabi {
+ aarch64_veclibabi_type_none,
+ aarch64_veclibabi_type_sleefgnu
+};
+
#endif
diff --git a/gcc/config/aarch64/aarch64-protos.h
b/gcc/config/aarch64/aarch64-protos.h
index 63339fa47df..53c6e455da8 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1066,4 +1066,7 @@ extern bool aarch64_harden_sls_blr_p (void);
extern void aarch64_output_patchable_area (unsigned int, bool);
+extern tree aarch64_builtin_vectorized_function (unsigned int fn,
+ tree type_out, tree type_in);
+
#endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 42617ced73a..50ac37ff01e 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -84,6 +84,7 @@
#include "aarch64-feature-deps.h"
#include "config/arm/aarch-common.h"
#include "config/arm/aarch-common-protos.h"
+#include "print-tree.h"
/* This file should be included last. */
#include "target-def.h"
@@ -2951,6 +2952,62 @@ pure_scalable_type_info::analyze (const_tree type)
return IS_PST;
}
+ /* Only functions and types that are part of the ARM C Language
+ Extensions (arm_sve.h) have the SVE type attributes.
+ The auto-vectorizer does not annotate the vector types it creates with
+ those attributes. With the support of vectorized libm function
+ builtins for SVE, scalable vectors without special attributes
+ have to be treated as well. */
+ if (TREE_CODE (type) == VECTOR_TYPE
+ && !TYPE_VECTOR_SUBPARTS (type).is_constant ())
+ {
+ /* Boolean vectors are special because they are used by
+ the vectorizer as masks that must go into the
+ predicate registers. */
+ if (TREE_CODE (TREE_TYPE (type)) == BOOLEAN_TYPE)
+ {
+ p.num_zr = 0;
+ p.num_pr = 1;
+ p.mode = p.orig_mode = TYPE_MODE (type);
+ add_piece (p);
+ return IS_PST;
+ }
+
+ static const struct {
+ machine_mode mode;
+ unsigned int element_size;
+ poly_uint64 vector_size;
+ } valid_vectors[] = {
+ { VNx8BFmode, 16, poly_uint64 (8, 8) }, /* svbfloat16_t */
+ { VNx8HFmode, 16, poly_uint64 (8, 8) }, /* svfloat16_t */
+ { VNx4SFmode, 32, poly_uint64 (4, 4) }, /* svfloat32_t */
+ { VNx2DFmode, 64, poly_uint64 (2, 2) }, /* svfloat64_t */
+ { VNx16BImode, 8, poly_uint64 (16, 16) }, /* sv[u]int8_t */
+ { VNx8HImode, 16, poly_uint64 (8, 8) }, /* sv[u]int16_t */
+ { VNx4SImode, 32, poly_uint64 (4, 4) }, /* sv[u]int32_t */
+ { VNx2DImode, 64, poly_uint64 (2, 2) }, /* sv[u]int64_t */
+ };
+
+ machine_mode elm_mode = TYPE_MODE (TREE_TYPE (type));
+ unsigned int elm_size = GET_MODE_BITSIZE (elm_mode).to_constant ();
+ for (unsigned i = 0;
+ i < sizeof (valid_vectors) / sizeof (valid_vectors[0]); i++)
+ if (valid_vectors[i].element_size == elm_size
+ && valid_vectors[i].mode == TYPE_MODE (type)
+ && known_eq (valid_vectors[i].vector_size,
+ TYPE_VECTOR_SUBPARTS (type)))
+ {
+ p.num_zr = 1;
+ p.num_pr = 0;
+ p.mode = p.orig_mode = valid_vectors[i].mode;
+ add_piece (p);
+ return IS_PST;
+ }
+
+ fatal_error (input_location, "unsupported vector type %qT"
+ " as function parameter without SVE attributes", type);
+ }
+
/* Check for user-defined PSTs. */
if (TREE_CODE (type) == ARRAY_TYPE)
return analyze_array (type);
@@ -17851,6 +17908,8 @@ aarch64_override_options_after_change_1 (struct
gcc_options *opts)
flag_mrecip_low_precision_sqrt = true;
}
+enum aarch64_veclibabi aarch64_selected_veclibabi =
aarch64_veclibabi_type_none;
+
/* 'Unpack' up the internal tuning structs and update the options
in OPTS. The caller must have set up selected_tune and selected_arch
as all the other target-specific codegen decisions are
@@ -18031,6 +18090,9 @@ aarch64_override_options_internal (struct gcc_options
*opts)
&& opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
opts->x_flag_prefetch_loop_arrays = 1;
+ if (opts->x_aarch64_veclibabi_type == aarch64_veclibabi_type_sleefgnu)
+ aarch64_selected_veclibabi = aarch64_veclibabi_type_sleefgnu;
+
aarch64_override_options_after_change_1 (opts);
}
@@ -28085,6 +28147,10 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_CONST_ANCHOR
#define TARGET_CONST_ANCHOR 0x1000000
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+ aarch64_builtin_vectorized_function
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-aarch64.h"
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 1d7967db9c0..76013dacdea 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -302,3 +302,18 @@ Constant memset size in bytes from which to start using
MOPS sequence.
-param=aarch64-vect-unroll-limit=
Target Joined UInteger Var(aarch64_vect_unroll_limit) Init(4) Param
Limit how much the autovectorizer may unroll a loop.
+
+;; -mveclibabi=
+TargetVariable
+enum aarch64_veclibabi aarch64_veclibabi_type = aarch64_veclibabi_type_none
+
+mveclibabi=
+Target RejectNegative Joined Var(aarch64_veclibabi_type)
Enum(aarch64_veclibabi) Init(aarch64_veclibabi_type_none)
+Vector library ABI to use.
+
+Enum
+Name(aarch64_veclibabi) Type(enum aarch64_veclibabi)
+Known vectorization library ABIs (for use with the -mveclibabi= option):
+
+EnumValue
+Enum(aarch64_veclibabi) String(sleefgnu) Value(aarch64_veclibabi_type_sleefgnu)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index a38547f53e5..71fbbf27522 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -20383,6 +20383,21 @@ across releases.
This option is only intended to be useful when developing GCC.
+@opindex mveclibabi
+@item -mveclibabi=@var{type}
+Specifies the ABI type to use for vectorizing intrinsics using an
+external library. The only type supported at present is @samp{sleefgnu},
+which specifies to use the GNU ABI variant of the Sleef Vectorized
+Math Library. This flag can be used for both, Advanced SIMD (NEON) and SVE.
+
+GCC currently emits vectorized calls to @code{exp}, @code{log}, @code{log10},
+@code{tanh}, @code{tan}, @code{atan}, @code{atanh}, @code{cbrt}, @code{sinh},
+@code{sin}, @code{asinh} and @code{asin} when possible and profitable
+on AArch64.
+
+Both @option{-ftree-vectorize} and @option{-funsafe-math-optimizations}
+must also be enabled. The libsleefgnu must be specified at link time.
+
@opindex mverbose-cost-dump
@item -mverbose-cost-dump
Enable verbose cost model dumping in the debug dump files. This option is
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-vecabi-sleefgnu-neon.c
b/gcc/testsuite/gcc.target/aarch64/vect-vecabi-