On 21/11/2024 15:41, Richard Sandiford wrote:
Claudio Bantaloukas <claudio.bantalou...@arm.com> writes:
This patch adds support for the following intrinsics:
- svdot[_f32_mf8]_fpm
- svdot_lane[_f32_mf8]_fpm
- svdot[_f16_mf8]_fpm
- svdot_lane[_f16_mf8]_fpm
The first two are available under a combination of the FP8DOT4 and SVE2
features.
Alternatively under the SSVE_FP8DOT4 feature under streaming mode.
The final two are available under a combination of the FP8DOT2 and SVE2
features.
Alternatively under the SSVE_FP8DOT2 feature under streaming mode.
Some of the comments from the previous patches apply here too
(e.g. the boilerplate at the start of the tests, and testing the
highest in-range index).
Done
It looks like the patch is missing a change to doc/invoke.texi.
Done
Otherwise it's just banal trivia, sorry:
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 022163f0726..65df48a3e65 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -835,21 +835,28 @@ public:
rtx
expand (function_expander &e) const override
{
- /* In the optab, the multiplication operands come before the accumulator
- operand. The optab is keyed off the multiplication mode. */
- e.rotate_inputs_left (0, 3);
insn_code icode;
- if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES)
- icode = e.convert_optab_handler_for_sign (sdot_prod_optab,
- udot_prod_optab,
- 0, e.result_mode (),
- GET_MODE (e.args[0]));
+ if (e.fpm_mode == aarch64_sve::FPM_set)
+ {
+ icode = code_for_aarch64_sve_dot (e.result_mode ());
+ }
Formatting nit, but: no braces around single statements, with the body
then being indented by 2 spaces relative to the "if".
Done
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index 09f343e7118..9f79f6e28c7 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -3994,6 +3994,34 @@ struct ternary_bfloat_def
};
SHAPE (ternary_bfloat)
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloat8_t, svmfloat8_t). */
+struct ternary_mfloat8_def
+ : public ternary_resize2_base<8, TYPE_mfloat, TYPE_mfloat>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ gcc_assert (group.fpm_mode == FPM_set);
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vM,vM", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ type_suffix_index type;
+ if (!r.check_num_arguments (4)
+ || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES
+ || !r.require_vector_type (1, VECTOR_TYPE_svmfloat8_t)
+ || !r.require_vector_type (2, VECTOR_TYPE_svmfloat8_t)
+ || !r.require_scalar_type (3, "int64_t"))
uint64_t
Done
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type, TYPE_SUFFIX_mf8, GROUP_none);
+ }
+};
+SHAPE (ternary_mfloat8)
+
/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
where the final argument is an integer constant expression in the range
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index c84c153e913..7d90e3b5e20 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -363,3 +363,15 @@ DEF_SVE_FUNCTION_GS_FPM (svmlallbb_lane,
ternary_mfloat8_lane, s_float_mf8, none
DEF_SVE_FUNCTION_GS_FPM (svmlallbt_lane, ternary_mfloat8_lane, s_float_mf8,
none, none, set)
DEF_SVE_FUNCTION_GS_FPM (svmlalltb_lane, ternary_mfloat8_lane, s_float_mf8,
none, none, set)
#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+ streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8DOT4,
AARCH64_FL_SSVE_FP8DOT4)
Elsewhere we've been putting the non-streaming and streaming requirements
on separate lines if the whole thing doesn't fit on one line:
#define REQUIRED_EXTENSIONS \
streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8DOT4, \
AARCH64_FL_SSVE_FP8DOT4)
Same below.
Done
Looks good to me otherwise, thanks.
Richard