In PR 105965 we accepted a request to form FMA instructions when the source code is using a narrow generic vector that contains just one element, corresponding to V1SF or V1DF mode, while the backend does not expand fma patterns for such modes.
For this to work under -ffp-contract=on, we either need to modify backends, or emulate such degenerate-vector FMA via scalar FMA in tree-vect-generic. Do the latter. gcc/c-family/ChangeLog: * c-gimplify.cc (fma_supported_p): Allow forming single-element vector FMA when scalar FMA is available. (c_gimplify_expr): Allow vector types. gcc/ChangeLog: * tree-vect-generic.cc (expand_vec1_fma): New helper. Use it... (expand_vector_operations_1): ... here to handle IFN_FMA. --- gcc/c-family/c-gimplify.cc | 10 ++++++-- gcc/tree-vect-generic.cc | 48 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc index c6fb764656..1942d5019e 100644 --- a/gcc/c-family/c-gimplify.cc +++ b/gcc/c-family/c-gimplify.cc @@ -875,7 +875,13 @@ c_build_bind_expr (location_t loc, tree block, tree body) static bool fma_supported_p (enum internal_fn fn, tree type) { - return direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH); + return (direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH) + /* Accept single-element vector FMA (see PR 105965) when the + backend handles the scalar but not the vector mode. */ + || (VECTOR_TYPE_P (type) + && known_eq (TYPE_VECTOR_SUBPARTS (type), 1U) + && direct_internal_fn_supported_p (fn, TREE_TYPE (type), + OPTIMIZE_FOR_BOTH))); } /* Gimplification of expression trees. */ @@ -939,7 +945,7 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p ATTRIBUTE_UNUSED, /* For -ffp-contract=on we need to attempt FMA contraction only during initial gimplification. Late contraction across statement boundaries would violate language semantics. */ - if (SCALAR_FLOAT_TYPE_P (type) + if ((SCALAR_FLOAT_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)) && flag_fp_contract_mode == FP_CONTRACT_ON && cfun && !(cfun->curr_properties & PROP_gimple_any) && fma_supported_p (IFN_FMA, type)) diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index 3c68361870..954b84edce 100644 --- a/gcc/tree-vect-generic.cc +++ b/gcc/tree-vect-generic.cc @@ -1983,6 +1983,36 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) gsi_replace (gsi, g, false); } +/* Expand IFN_FMA, assuming vector contains just one scalar. + c_gimplify_expr can introduce it when performing FMA contraction. */ + +static void +expand_vec1_fma (gimple_stmt_iterator *gsi) +{ + gcall *call = as_a <gcall *> (gsi_stmt (*gsi)); + tree type = TREE_TYPE (gimple_call_arg (call, 0)); + if (!VECTOR_TYPE_P (type)) + return; + gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), 1U)); + + for (int i = 0; i < 3; i++) + { + tree arg = gimple_call_arg (call, i); + arg = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (type), arg); + gimple_call_set_arg (call, i, arg); + } + tree lhs = gimple_call_lhs (call); + if (lhs) + { + tree new_lhs = make_ssa_name (TREE_TYPE (type)); + gimple_call_set_lhs (call, new_lhs); + tree ctor = build_constructor_single (type, 0, new_lhs); + gimple *g = gimple_build_assign (lhs, CONSTRUCTOR, ctor); + gsi_insert_after (gsi, g, GSI_NEW_STMT); + } + update_stmt (call); +} + /* Process one statement. If we identify a vector operation, expand it. */ static void @@ -1998,8 +2028,22 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi) gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi)); if (!stmt) { - if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT)) - expand_vector_conversion (gsi); + gcall *call = dyn_cast <gcall *> (gsi_stmt (*gsi)); + if (!call || !gimple_call_internal_p (call)) + return; + switch (gimple_call_internal_fn (call)) + { + case IFN_VEC_CONVERT: + return expand_vector_conversion (gsi); + case IFN_FMA: + case IFN_FMS: + case IFN_FNMA: + case IFN_FNMS: + if (!direct_internal_fn_supported_p (call, OPTIMIZE_FOR_BOTH)) + return expand_vec1_fma (gsi); + default: + break; + } return; } -- 2.49.0