Currently we fail to optimize those which are used when MIN/MAX_EXPR cannot be used for FP values but the target has IEEE conforming implementations.
This patch adds support for fmin/fmax detection to phiopt and makes the named patterns available in the x86 backend. It also removes some superfluous restrictions from the SLP vectorizer to make SLP vectorizing IFN_FMIN/FMAX work. This also unblocks the fix for PR94865, restoring gcc.target/i386/pr54855-[89].c operation. Bootstrap & regtest running on x86_64-unknown-linux-gnu. The x86 portion did simplify a bit, is it still OK? I verified vectorization works for SSE, AVX and AVX512 and 3dnow (it doesn't work for v2sf w/o -m3dnow even though "3dnow-in-sse" would work) Thanks, Richard. 2020-05-08 Richard Biener <rguent...@suse.de> PR tree-optimization/88540 * tree-ssa-phiopt.c: Include internal-fn.h and gimple-match.h. (minmax_replacement): Parse IFN_FMIN/FMAX as MIN/MAX_EXPR. Code-generate IFN_FMIN/FMAX if honoring NaNs or signed zeros. * tree-vect-slp.c (vect_build_slp_tree_1): Remove superfluous tests on calls. * config/i386/i386.md (*ieee_s<ieee_maxmin><mode>3): Rename as (f<ieee_maxmin><mode>3): this. * config/i386/mmx.md (mmx_<code>v2sf3): Adjust. (mmx_ieee_<ieee_maxmin>v2sf3): Rename as (f<ieee_maxmin>v2sf3): this. * config/i386/sse.md (<code><mode>3<mask_name><round_saeonly_name>): Adjust. (ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>): Rename as (f<ieee_maxmin><mode>3<mask_name><round_saeonly_name>): this. * gcc.target/i386/pr88540-1.c: New testcase. --- gcc/config/i386/i386.md | 2 +- gcc/config/i386/mmx.md | 4 +- gcc/config/i386/sse.md | 4 +- gcc/testsuite/gcc.target/i386/pr88540-1.c | 21 ++++++++++ gcc/tree-ssa-phiopt.c | 66 +++++++++++++++++++++++++------ gcc/tree-vect-slp.c | 3 -- 6 files changed, 80 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr88540-1.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 8bfc9cb0b71..f1496a98456 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -18443,7 +18443,7 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. -(define_insn "*ieee_s<ieee_maxmin><mode>3" +(define_insn "f<ieee_maxmin><mode>3" [(set (match_operand:MODEF 0 "register_operand" "=x,v") (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "0,v") diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 472f90f9bc1..422d858f25d 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -301,7 +301,7 @@ if (!flag_finite_math_only || flag_signed_zeros) { operands[1] = force_reg (V2SFmode, operands[1]); - emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3 + emit_insn (gen_f<maxmin_float>v2sf3 (operands[0], operands[1], operands[2])); DONE; } @@ -331,7 +331,7 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. -(define_insn "mmx_ieee_<ieee_maxmin>v2sf3" +(define_insn "f<ieee_maxmin>v2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 7a7ecd4be87..ac350711527 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2191,7 +2191,7 @@ if (!flag_finite_math_only || flag_signed_zeros) { operands[1] = force_reg (<MODE>mode, operands[1]); - emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name> + emit_insn (gen_f<maxmin_float><mode>3<mask_name><round_saeonly_name> (operands[0], operands[1], operands[2] <mask_operand_arg34> <round_saeonly_mask_arg3>)); @@ -2229,7 +2229,7 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. -(define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>" +(define_insn "f<ieee_maxmin><mode>3<mask_name><round_saeonly_name>" [(set (match_operand:VF 0 "register_operand" "=x,v") (unspec:VF [(match_operand:VF 1 "register_operand" "0,v") diff --git a/gcc/testsuite/gcc.target/i386/pr88540-1.c b/gcc/testsuite/gcc.target/i386/pr88540-1.c new file mode 100644 index 00000000000..a66b89ef66c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88540-1.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +void test1(double* __restrict d1, double* __restrict d2, double* __restrict d3) +{ + for (int n = 0; n < 2; ++n) + { + d3[n] = d1[n] < d2[n] ? d1[n] : d2[n]; + } +} + +void test2(double* __restrict d1, double* __restrict d2, double* __restrict d3) +{ + for (int n = 0; n < 2; ++n) + { + d3[n] = d1[n] > d2[n] ? d1[n] : d2[n]; + } +} + +/* { dg-final { scan-assembler "minpd" } } */ +/* { dg-final { scan-assembler "maxpd" } } */ diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c index b1e0dce93d8..7ceda4a37ab 100644 --- a/gcc/tree-ssa-phiopt.c +++ b/gcc/tree-ssa-phiopt.c @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3. If not see #include "tree-inline.h" #include "case-cfn-macros.h" #include "tree-eh.h" +#include "internal-fn.h" +#include "gimple-match.h" static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); static bool two_value_replacement (basic_block, basic_block, edge, gphi *, @@ -1364,7 +1366,6 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, { tree result, type, rhs; gcond *cond; - gassign *new_stmt; edge true_edge, false_edge; enum tree_code cmp, minmax, ass_code; tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false; @@ -1373,7 +1374,10 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, type = TREE_TYPE (PHI_RESULT (phi)); /* The optimization may be unsafe due to NaNs. */ - if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) + if ((HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) + && (!direct_internal_fn_supported_p (IFN_FMIN, type, OPTIMIZE_FOR_BOTH) + || !direct_internal_fn_supported_p (IFN_FMAX, type, + OPTIMIZE_FOR_BOTH))) return false; cond = as_a <gcond *> (last_stmt (cond_bb)); @@ -1530,16 +1534,44 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, gimple *assign = last_and_only_stmt (middle_bb); tree lhs, op0, op1, bound; - if (!assign - || gimple_code (assign) != GIMPLE_ASSIGN) + if (!assign) return false; - lhs = gimple_assign_lhs (assign); - ass_code = gimple_assign_rhs_code (assign); - if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) + if (is_gimple_assign (assign)) + { + lhs = gimple_assign_lhs (assign); + ass_code = gimple_assign_rhs_code (assign); + if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) + return false; + op0 = gimple_assign_rhs1 (assign); + op1 = gimple_assign_rhs2 (assign); + } + else if (is_gimple_call (assign)) + { + lhs = gimple_call_lhs (assign); + if (!lhs) + return false; + combined_fn cfn = gimple_call_combined_fn (assign); + switch (cfn) + { + CASE_CFN_FMIN: + CASE_CFN_FMIN_FN: + ass_code = MIN_EXPR; + op0 = gimple_call_arg (assign, 0); + op1 = gimple_call_arg (assign, 1); + break; + CASE_CFN_FMAX: + CASE_CFN_FMAX_FN: + ass_code = MAX_EXPR; + op0 = gimple_call_arg (assign, 0); + op1 = gimple_call_arg (assign, 1); + break; + default: + return false; + } + } + else return false; - op0 = gimple_assign_rhs1 (assign); - op1 = gimple_assign_rhs2 (assign); if (true_edge->src == middle_bb) { @@ -1574,8 +1606,9 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, return false; /* We need BOUND <= LARGER. */ - if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, - bound, larger))) + if (bound != larger + && !integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, + bound, larger))) return false; } else if (operand_equal_for_phi_arg_p (arg_false, smaller) @@ -1698,7 +1731,16 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, result = make_ssa_name (TREE_TYPE (result)); /* Emit the statement to compute min/max. */ - new_stmt = gimple_build_assign (result, minmax, arg0, arg1); + gimple *new_stmt; + if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) + { + new_stmt = gimple_build_call_internal (minmax == MAX_EXPR + ? IFN_FMAX : IFN_FMIN, + 2, arg0, arg1); + gimple_call_set_lhs (new_stmt, result); + } + else + new_stmt = gimple_build_assign (result, minmax, arg0, arg1); gsi = gsi_last_bb (cond_bb); gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index c097840b09a..b1dcaec3d9f 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -849,9 +849,6 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, else if ((gimple_call_internal_p (call_stmt) && (!vectorizable_internal_fn_p (gimple_call_internal_fn (call_stmt)))) - || gimple_call_tail_p (call_stmt) - || gimple_call_noreturn_p (call_stmt) - || !gimple_call_nothrow_p (call_stmt) || gimple_call_chain (call_stmt)) { if (dump_enabled_p ()) -- 2.13.7