https://gcc.gnu.org/g:49365994da6c46829ce85839042ae8e409343188
commit r15-5786-g49365994da6c46829ce85839042ae8e409343188 Author: Claudio Bantaloukas <claudio.bantalou...@arm.com> Date: Fri Nov 29 12:52:43 2024 +0000 aarch64: specify fpm mode in function instances and groups Some intrinsics require setting the fpm register before calling the specific asm opcode required. In order to simplify review, this patch: - adds the fpm_mode_index attribute to function_group_info and function_instance objects - updates existing initialisations and call sites. - updates equality and hash operations gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svdiv_impl): Specify FPM_unused when folding. (svmul_impl): Likewise. * config/aarch64/aarch64-sve-builtins-shapes.cc (build_one): Use the group fpm_mode when creating function instances. * config/aarch64/aarch64-sve-builtins-sve2.cc (svaba_impl, svqrshl_impl, svqshl_impl,svrshl_impl, svsra_impl): Specify FPM_unused when folding. * config/aarch64/aarch64-sve-builtins.cc (function_groups): Set fpm_mode on all elements. (neon_sve_function_groups, sme_function_groups): Likewise. (function_instance::hash): Include fpm_mode in hash. (function_builder::add_overloaded_functions): Use the group fpm mode. (function_resolver::lookup_form): Use the function instance fpm_mode when looking up a function. * config/aarch64/aarch64-sve-builtins.def (DEF_SVE_FUNCTION_GS_FPM): add define. (DEF_SVE_FUNCTION_GS): redefine against DEF_SVE_FUNCTION_GS_FPM. * config/aarch64/aarch64-sve-builtins.h (fpm_mode_index): New. (function_group_info): Add fpm_mode. (function_instance): Likewise. (function_instance::operator==): Handle fpm_mode. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-base.cc | 21 ++++++++++-------- gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 4 ++-- gcc/config/aarch64/aarch64-sve-builtins-sve2.cc | 27 ++++++++++++++--------- gcc/config/aarch64/aarch64-sve-builtins.cc | 21 +++++++++++------- gcc/config/aarch64/aarch64-sve-builtins.def | 8 ++++++- gcc/config/aarch64/aarch64-sve-builtins.h | 27 ++++++++++++++++++----- 6 files changed, 71 insertions(+), 37 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 87e9909b55a0..95e66dc2adf9 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -775,9 +775,9 @@ public: tree pg = gimple_call_arg (f.call, 0); if (!f.type_suffix (0).unsigned_p && integer_minus_onep (op2)) { - function_instance instance ("svneg", functions::svneg, - shapes::unary, MODE_none, - f.type_suffix_ids, GROUP_none, f.pred); + function_instance instance ("svneg", functions::svneg, shapes::unary, + MODE_none, f.type_suffix_ids, GROUP_none, + f.pred, FPM_unused); gcall *call = f.redirect_call (instance); unsigned offset_index = 0; if (f.pred == PRED_m) @@ -805,7 +805,8 @@ public: { function_instance instance ("svlsr", functions::svlsr, shapes::binary_uint_opt_n, MODE_n, - f.type_suffix_ids, GROUP_none, f.pred); + f.type_suffix_ids, GROUP_none, f.pred, + FPM_unused); call = f.redirect_call (instance); tree d = INTEGRAL_TYPE_P (TREE_TYPE (op2)) ? op2 : op2_cst; new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d)); @@ -818,7 +819,8 @@ public: function_instance instance ("svasrd", functions::svasrd, shapes::shift_right_imm, MODE_n, - f.type_suffix_ids, GROUP_none, f.pred); + f.type_suffix_ids, GROUP_none, f.pred, + FPM_unused); call = f.redirect_call (instance); new_divisor = wide_int_to_tree (scalar_types[VECTOR_TYPE_svuint64_t], tree_log2 (op2_cst)); @@ -2100,9 +2102,9 @@ public: negated_op = op2; if (!f.type_suffix (0).unsigned_p && negated_op) { - function_instance instance ("svneg", functions::svneg, - shapes::unary, MODE_none, - f.type_suffix_ids, GROUP_none, f.pred); + function_instance instance ("svneg", functions::svneg, shapes::unary, + MODE_none, f.type_suffix_ids, GROUP_none, + f.pred, FPM_unused); gcall *call = f.redirect_call (instance); unsigned offset_index = 0; if (f.pred == PRED_m) @@ -2143,7 +2145,8 @@ public: tree_log2 (shift_op2)); function_instance instance ("svlsl", functions::svlsl, shapes::binary_uint_opt_n, MODE_n, - f.type_suffix_ids, GROUP_none, f.pred); + f.type_suffix_ids, GROUP_none, f.pred, + FPM_unused); gcall *call = f.redirect_call (instance); gimple_call_set_arg (call, 1, shift_op1); gimple_call_set_arg (call, 2, shift_op2); diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index 371507513c3b..ebe2e5817284 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -349,8 +349,8 @@ build_one (function_builder &b, const char *signature, /* Byte forms of svdupq take 16 arguments. */ auto_vec<tree, 16> argument_types; function_instance instance (group.base_name, *group.base, *group.shape, - mode_suffix_id, group.types[ti], - group.groups[gi], group.preds[pi]); + mode_suffix_id, group.types[ti], group.groups[gi], + group.preds[pi], group.fpm_mode); tree return_type = parse_signature (instance, signature, argument_types); apply_predication (instance, return_type, argument_types); b.add_unique_function (instance, return_type, argument_types, diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index b17b78dadd5e..6bfc62bdce63 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -126,9 +126,9 @@ public: tree op1 = gimple_call_arg (f.call, 0); if (!integer_zerop (op1)) return NULL; - function_instance instance ("svabd", functions::svabd, - shapes::binary_opt_n, f.mode_suffix_id, - f.type_suffix_ids, GROUP_none, PRED_x); + function_instance instance ("svabd", functions::svabd, shapes::binary_opt_n, + f.mode_suffix_id, f.type_suffix_ids, GROUP_none, + PRED_x, FPM_unused); gcall *call = f.redirect_call (instance); /* Add a ptrue as predicate, because unlike svaba, svabd is predicated. */ @@ -512,7 +512,8 @@ public: that we can use for sensible shift amounts. */ function_instance instance ("svqshl", functions::svqshl, shapes::binary_int_opt_n, MODE_n, - f.type_suffix_ids, GROUP_none, f.pred); + f.type_suffix_ids, GROUP_none, f.pred, + FPM_unused); return f.redirect_call (instance); } else @@ -520,9 +521,9 @@ public: /* The saturation has no effect, and [SU]RSHL has immediate forms that we can use for sensible shift amounts. */ function_instance instance ("svrshl", functions::svrshl, - shapes::binary_int_opt_single_n, - MODE_n, f.type_suffix_ids, GROUP_none, - f.pred); + shapes::binary_int_opt_single_n, MODE_n, + f.type_suffix_ids, GROUP_none, f.pred, + FPM_unused); return f.redirect_call (instance); } } @@ -551,7 +552,8 @@ public: -wi::to_wide (amount)); function_instance instance ("svasr", functions::svasr, shapes::binary_uint_opt_n, MODE_n, - f.type_suffix_ids, GROUP_none, f.pred); + f.type_suffix_ids, GROUP_none, f.pred, + FPM_unused); if (f.type_suffix (0).unsigned_p) { instance.base_name = "svlsr"; @@ -586,7 +588,8 @@ public: that we can use for sensible shift amounts. */ function_instance instance ("svlsl", functions::svlsl, shapes::binary_uint_opt_n, MODE_n, - f.type_suffix_ids, GROUP_none, f.pred); + f.type_suffix_ids, GROUP_none, f.pred, + FPM_unused); gcall *call = f.redirect_call (instance); gimple_call_set_arg (call, 2, amount); return call; @@ -599,7 +602,8 @@ public: -wi::to_wide (amount)); function_instance instance ("svrshr", functions::svrshr, shapes::shift_right_imm, MODE_n, - f.type_suffix_ids, GROUP_none, f.pred); + f.type_suffix_ids, GROUP_none, f.pred, + FPM_unused); gcall *call = f.redirect_call (instance); gimple_call_set_arg (call, 2, amount); return call; @@ -635,7 +639,8 @@ public: return NULL; function_instance instance ("svlsr", functions::svlsr, shapes::binary_uint_opt_n, MODE_n, - f.type_suffix_ids, GROUP_none, PRED_x); + f.type_suffix_ids, GROUP_none, PRED_x, + FPM_unused); if (!f.type_suffix (0).unsigned_p) { instance.base_name = "svasr"; diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 1d966f43bf5f..bc8fd71083f4 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -933,9 +933,10 @@ static const predication_index preds_za_m[] = { PRED_za_m, NUM_PREDS }; /* A list of all arm_sve.h functions. */ static CONSTEXPR const function_group_info function_groups[] = { -#define DEF_SVE_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ +#define DEF_SVE_FUNCTION_GS_FPM(NAME, SHAPE, TYPES, GROUPS, PREDS, FPM_MODE) \ { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_##GROUPS, \ - preds_##PREDS, aarch64_required_extensions::REQUIRED_EXTENSIONS }, + preds_##PREDS, aarch64_required_extensions::REQUIRED_EXTENSIONS, \ + FPM_##FPM_MODE }, #include "aarch64-sve-builtins.def" }; @@ -943,7 +944,8 @@ static CONSTEXPR const function_group_info function_groups[] = { static CONSTEXPR const function_group_info neon_sve_function_groups[] = { #define DEF_NEON_SVE_FUNCTION(NAME, SHAPE, TYPES, GROUPS, PREDS) \ { #NAME, &neon_sve_bridge_functions::NAME, &shapes::SHAPE, types_##TYPES, \ - groups_##GROUPS, preds_##PREDS, aarch64_required_extensions::ssve (0) }, + groups_##GROUPS, preds_##PREDS, aarch64_required_extensions::ssve (0), \ + FPM_unused }, #include "aarch64-neon-sve-bridge-builtins.def" }; @@ -951,12 +953,13 @@ static CONSTEXPR const function_group_info neon_sve_function_groups[] = { static CONSTEXPR const function_group_info sme_function_groups[] = { #define DEF_SME_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_##GROUPS, \ - preds_##PREDS, aarch64_required_extensions::REQUIRED_EXTENSIONS }, + preds_##PREDS, aarch64_required_extensions::REQUIRED_EXTENSIONS, \ + FPM_unused }, #define DEF_SME_ZA_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ { #NAME, &functions::NAME##_za, &shapes::SHAPE, types_##TYPES, \ groups_##GROUPS, preds_##PREDS, \ aarch64_required_extensions::REQUIRED_EXTENSIONS \ - .and_also (AARCH64_FL_ZA_ON) }, + .and_also (AARCH64_FL_ZA_ON), FPM_unused }, #include "aarch64-sve-builtins-sme.def" }; @@ -1238,6 +1241,7 @@ function_instance::hash () const h.add_int (type_suffix_ids[1]); h.add_int (group_suffix_id); h.add_int (pred); + h.add_int (fpm_mode); return h.end (); } @@ -1668,7 +1672,8 @@ function_builder::add_overloaded_functions (const function_group_info &group, { function_instance instance (group.base_name, *group.base, *group.shape, mode, types, - group_suffix_id, group.preds[pi]); + group_suffix_id, group.preds[pi], + group.fpm_mode); add_overloaded_function (instance, group.required_extensions); }; @@ -1845,8 +1850,8 @@ function_resolver::lookup_form (mode_suffix_index mode, group_suffix_index group) { type_suffix_pair types = { type0, type1 }; - function_instance instance (base_name, base, shape, mode, types, - group, pred); + function_instance instance (base_name, base, shape, mode, types, group, pred, + fpm_mode); registered_function *rfn = function_table->find_with_hash (instance, instance.hash ()); return rfn ? rfn->decl : NULL_TREE; diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def index 47c396b866de..252c126dd39e 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.def +++ b/gcc/config/aarch64/aarch64-sve-builtins.def @@ -37,8 +37,13 @@ #define DEF_SVE_GROUP_SUFFIX(A, B, C) #endif +#ifndef DEF_SVE_FUNCTION_GS_FPM +#define DEF_SVE_FUNCTION_GS_FPM(A, B, C, D, E, F) +#endif + #ifndef DEF_SVE_FUNCTION_GS -#define DEF_SVE_FUNCTION_GS(A, B, C, D, E) +#define DEF_SVE_FUNCTION_GS(A, B, C, D, E) \ + DEF_SVE_FUNCTION_GS_FPM(A, B, C, D, E, unused) #endif #ifndef DEF_SVE_NEON_TYPE_SUFFIX @@ -164,6 +169,7 @@ DEF_SVE_GROUP_SUFFIX (vg4x4, 4, 4) #undef DEF_SVE_FUNCTION #undef DEF_SVE_FUNCTION_GS +#undef DEF_SVE_FUNCTION_GS_FPM #undef DEF_SVE_GROUP_SUFFIX #undef DEF_SME_ZA_SUFFIX #undef DEF_SVE_NEON_TYPE_SUFFIX diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h index d209aebe96e2..1d0ca39a6742 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.h +++ b/gcc/config/aarch64/aarch64-sve-builtins.h @@ -28,6 +28,7 @@ - the "mode" suffix ("_n", "_index", etc.) - the type suffixes ("_s32", "_b8", etc.) - the predication suffix ("_x", "_z", etc.) + - the "_fpm" suffix when the floating point mode register is set Each piece of information is individually useful, so we retain this classification throughout: @@ -42,6 +43,8 @@ - prediction_index extends the predication suffix with an additional alternative: PRED_implicit for implicitly-predicated operations + - fpm_mode represents whether the fpm register is set or not + In addition to its unique full name, a function may have a shorter overloaded alias. This alias removes pieces of the suffixes that can be inferred from the arguments, such as by shortening the mode @@ -164,6 +167,14 @@ enum predication_index NUM_PREDS }; +/* Classifies intrinsics on whether they set the FPM register */ +enum fpm_mode_index +{ + FPM_unused, + FPM_set, + NUM_FPM_MODES +}; + /* Classifies element types, based on type suffixes with the bit count removed. "count" isn't really an element type, but we pretend it is for consistency. */ @@ -366,6 +377,9 @@ struct function_group_info /* The architecture extensions that the functions require. */ aarch64_required_extensions required_extensions; + + /* Whether the floating point register is set */ + fpm_mode_index fpm_mode; }; /* Describes a single fully-resolved function (i.e. one that has a @@ -376,7 +390,7 @@ public: function_instance (const char *, const function_base *, const function_shape *, mode_suffix_index, const type_suffix_pair &, group_suffix_index, - predication_index); + predication_index, fpm_mode_index); bool operator== (const function_instance &) const; bool operator!= (const function_instance &) const; @@ -420,6 +434,7 @@ public: type_suffix_pair type_suffix_ids; group_suffix_index group_suffix_id; predication_index pred; + fpm_mode_index fpm_mode; }; class registered_function; @@ -876,16 +891,15 @@ tuple_type_field (tree type) } inline function_instance:: -function_instance (const char *base_name_in, - const function_base *base_in, +function_instance (const char *base_name_in, const function_base *base_in, const function_shape *shape_in, mode_suffix_index mode_suffix_id_in, const type_suffix_pair &type_suffix_ids_in, group_suffix_index group_suffix_id_in, - predication_index pred_in) + predication_index pred_in, fpm_mode_index fpm_mode_in) : base_name (base_name_in), base (base_in), shape (shape_in), mode_suffix_id (mode_suffix_id_in), group_suffix_id (group_suffix_id_in), - pred (pred_in) + pred (pred_in), fpm_mode (fpm_mode_in) { memcpy (type_suffix_ids, type_suffix_ids_in, sizeof (type_suffix_ids)); } @@ -899,7 +913,8 @@ function_instance::operator== (const function_instance &other) const && type_suffix_ids[0] == other.type_suffix_ids[0] && type_suffix_ids[1] == other.type_suffix_ids[1] && group_suffix_id == other.group_suffix_id - && pred == other.pred); + && pred == other.pred + && fpm_mode == other.fpm_mode); } inline bool