Claudio Bantaloukas <claudio.bantalou...@arm.com> writes: > This patch adds support for the following intrinsics: > - svdot[_f32_mf8]_fpm > - svdot_lane[_f32_mf8]_fpm > - svdot[_f16_mf8]_fpm > - svdot_lane[_f16_mf8]_fpm > > The first two are available under a combination of the FP8DOT4 and SVE2 > features. > Alternatively under the SSVE_FP8DOT4 feature under streaming mode. > The final two are available under a combination of the FP8DOT2 and SVE2 > features. > Alternatively under the SSVE_FP8DOT2 feature under streaming mode.
Some of the comments from the previous patches apply here too (e.g. the boilerplate at the start of the tests, and testing the highest in-range index). It looks like the patch is missing a change to doc/invoke.texi. Otherwise it's just banal trivia, sorry: > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc > b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > index 022163f0726..65df48a3e65 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > @@ -835,21 +835,28 @@ public: > rtx > expand (function_expander &e) const override > { > - /* In the optab, the multiplication operands come before the accumulator > - operand. The optab is keyed off the multiplication mode. */ > - e.rotate_inputs_left (0, 3); > insn_code icode; > - if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES) > - icode = e.convert_optab_handler_for_sign (sdot_prod_optab, > - udot_prod_optab, > - 0, e.result_mode (), > - GET_MODE (e.args[0])); > + if (e.fpm_mode == aarch64_sve::FPM_set) > + { > + icode = code_for_aarch64_sve_dot (e.result_mode ()); > + } Formatting nit, but: no braces around single statements, with the body then being indented by 2 spaces relative to the "if". > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc > b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc > index 09f343e7118..9f79f6e28c7 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc > +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc > @@ -3994,6 +3994,34 @@ struct ternary_bfloat_def > }; > SHAPE (ternary_bfloat) > > +/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloat8_t, svmfloat8_t). */ > +struct ternary_mfloat8_def > + : public ternary_resize2_base<8, TYPE_mfloat, TYPE_mfloat> > +{ > + void > + build (function_builder &b, const function_group_info &group) const > override > + { > + gcc_assert (group.fpm_mode == FPM_set); > + b.add_overloaded_functions (group, MODE_none); > + build_all (b, "v0,v0,vM,vM", group, MODE_none); > + } > + > + tree > + resolve (function_resolver &r) const override > + { > + type_suffix_index type; > + if (!r.check_num_arguments (4) > + || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES > + || !r.require_vector_type (1, VECTOR_TYPE_svmfloat8_t) > + || !r.require_vector_type (2, VECTOR_TYPE_svmfloat8_t) > + || !r.require_scalar_type (3, "int64_t")) uint64_t > + return error_mark_node; > + > + return r.resolve_to (r.mode_suffix_id, type, TYPE_SUFFIX_mf8, > GROUP_none); > + } > +}; > +SHAPE (ternary_mfloat8) > + > /* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t) > > where the final argument is an integer constant expression in the range > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def > b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def > index c84c153e913..7d90e3b5e20 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def > +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def > @@ -363,3 +363,15 @@ DEF_SVE_FUNCTION_GS_FPM (svmlallbb_lane, > ternary_mfloat8_lane, s_float_mf8, none > DEF_SVE_FUNCTION_GS_FPM (svmlallbt_lane, ternary_mfloat8_lane, s_float_mf8, > none, none, set) > DEF_SVE_FUNCTION_GS_FPM (svmlalltb_lane, ternary_mfloat8_lane, s_float_mf8, > none, none, set) > #undef REQUIRED_EXTENSIONS > + > +#define REQUIRED_EXTENSIONS \ > + streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8DOT4, > AARCH64_FL_SSVE_FP8DOT4) Elsewhere we've been putting the non-streaming and streaming requirements on separate lines if the whole thing doesn't fit on one line: #define REQUIRED_EXTENSIONS \ streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8DOT4, \ AARCH64_FL_SSVE_FP8DOT4) Same below. Looks good to me otherwise, thanks. Richard