Currently we fail to optimize those which are used when MIN/MAX_EXPR
cannot be used for FP values but the target has IEEE conforming
implementations.

This patch adds support for fmin/fmax detection to phiopt and
makes the named patterns available in the x86 backend.  It also
removes some superfluous restrictions from the SLP vectorizer
to make SLP vectorizing IFN_FMIN/FMAX work.

This also unblocks the fix for PR94865, restoring 
gcc.target/i386/pr54855-[89].c operation.

Bootstrap & regtest running on x86_64-unknown-linux-gnu.

The x86 portion did simplify a bit, is it still OK?  I verified
vectorization works for SSE, AVX and AVX512 and 3dnow (it doesn't
work for v2sf w/o -m3dnow even though "3dnow-in-sse" would work)

Thanks,
Richard.

2020-05-08  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/88540
        * tree-ssa-phiopt.c: Include internal-fn.h and gimple-match.h.
        (minmax_replacement): Parse IFN_FMIN/FMAX as MIN/MAX_EXPR.
        Code-generate IFN_FMIN/FMAX if honoring NaNs or signed zeros.
        * tree-vect-slp.c (vect_build_slp_tree_1): Remove superfluous
        tests on calls.

        * config/i386/i386.md (*ieee_s<ieee_maxmin><mode>3): Rename as
        (f<ieee_maxmin><mode>3): this.
        * config/i386/mmx.md (mmx_<code>v2sf3): Adjust.
        (mmx_ieee_<ieee_maxmin>v2sf3): Rename as
        (f<ieee_maxmin>v2sf3): this.
        * config/i386/sse.md
        (<code><mode>3<mask_name><round_saeonly_name>): Adjust.
        (ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>):
        Rename as
        (f<ieee_maxmin><mode>3<mask_name><round_saeonly_name>): this.

        * gcc.target/i386/pr88540-1.c: New testcase.
---
 gcc/config/i386/i386.md                   |  2 +-
 gcc/config/i386/mmx.md                    |  4 +-
 gcc/config/i386/sse.md                    |  4 +-
 gcc/testsuite/gcc.target/i386/pr88540-1.c | 21 ++++++++++
 gcc/tree-ssa-phiopt.c                     | 66 +++++++++++++++++++++++++------
 gcc/tree-vect-slp.c                       |  3 --
 6 files changed, 80 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr88540-1.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8bfc9cb0b71..f1496a98456 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -18443,7 +18443,7 @@
 ;; Their operands are not commutative, and thus they may be used in the
 ;; presence of -0.0 and NaN.
 
-(define_insn "*ieee_s<ieee_maxmin><mode>3"
+(define_insn "f<ieee_maxmin><mode>3"
   [(set (match_operand:MODEF 0 "register_operand" "=x,v")
        (unspec:MODEF
          [(match_operand:MODEF 1 "register_operand" "0,v")
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 472f90f9bc1..422d858f25d 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -301,7 +301,7 @@
   if (!flag_finite_math_only || flag_signed_zeros)
     {
       operands[1] = force_reg (V2SFmode, operands[1]);
-      emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3
+      emit_insn (gen_f<maxmin_float>v2sf3
                 (operands[0], operands[1], operands[2]));
       DONE;
     }
@@ -331,7 +331,7 @@
 ;; Their operands are not commutative, and thus they may be used in the
 ;; presence of -0.0 and NaN.
 
-(define_insn "mmx_ieee_<ieee_maxmin>v2sf3"
+(define_insn "f<ieee_maxmin>v2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y")
         (unspec:V2SF
          [(match_operand:V2SF 1 "register_operand" "0")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7a7ecd4be87..ac350711527 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2191,7 +2191,7 @@
   if (!flag_finite_math_only || flag_signed_zeros)
     {
       operands[1] = force_reg (<MODE>mode, operands[1]);
-      emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
+      emit_insn (gen_f<maxmin_float><mode>3<mask_name><round_saeonly_name>
                 (operands[0], operands[1], operands[2]
                  <mask_operand_arg34>
                  <round_saeonly_mask_arg3>));
@@ -2229,7 +2229,7 @@
 ;; Their operands are not commutative, and thus they may be used in the
 ;; presence of -0.0 and NaN.
 
-(define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
+(define_insn "f<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
   [(set (match_operand:VF 0 "register_operand" "=x,v")
        (unspec:VF
          [(match_operand:VF 1 "register_operand" "0,v")
diff --git a/gcc/testsuite/gcc.target/i386/pr88540-1.c 
b/gcc/testsuite/gcc.target/i386/pr88540-1.c
new file mode 100644
index 00000000000..a66b89ef66c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88540-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+void test1(double* __restrict d1, double* __restrict d2, double* __restrict d3)
+{
+  for (int n = 0; n < 2; ++n)
+    {
+      d3[n] = d1[n] < d2[n] ? d1[n] : d2[n];
+    }
+}
+
+void test2(double* __restrict d1, double* __restrict d2, double* __restrict d3)
+{
+  for (int n = 0; n < 2; ++n)
+    {
+      d3[n] = d1[n] > d2[n] ? d1[n] : d2[n];
+    }
+}
+
+/* { dg-final { scan-assembler "minpd" } } */
+/* { dg-final { scan-assembler "maxpd" } } */
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index b1e0dce93d8..7ceda4a37ab 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-inline.h"
 #include "case-cfn-macros.h"
 #include "tree-eh.h"
+#include "internal-fn.h"
+#include "gimple-match.h"
 
 static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
 static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
@@ -1364,7 +1366,6 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
 {
   tree result, type, rhs;
   gcond *cond;
-  gassign *new_stmt;
   edge true_edge, false_edge;
   enum tree_code cmp, minmax, ass_code;
   tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false;
@@ -1373,7 +1374,10 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
   type = TREE_TYPE (PHI_RESULT (phi));
 
   /* The optimization may be unsafe due to NaNs.  */
-  if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+  if ((HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+      && (!direct_internal_fn_supported_p (IFN_FMIN, type, OPTIMIZE_FOR_BOTH)
+         || !direct_internal_fn_supported_p (IFN_FMAX, type,
+                                             OPTIMIZE_FOR_BOTH)))
     return false;
 
   cond = as_a <gcond *> (last_stmt (cond_bb));
@@ -1530,16 +1534,44 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
       gimple *assign = last_and_only_stmt (middle_bb);
       tree lhs, op0, op1, bound;
 
-      if (!assign
-         || gimple_code (assign) != GIMPLE_ASSIGN)
+      if (!assign)
        return false;
 
-      lhs = gimple_assign_lhs (assign);
-      ass_code = gimple_assign_rhs_code (assign);
-      if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
+      if (is_gimple_assign (assign))
+       {
+         lhs = gimple_assign_lhs (assign);
+         ass_code = gimple_assign_rhs_code (assign);
+         if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
+           return false;
+         op0 = gimple_assign_rhs1 (assign);
+         op1 = gimple_assign_rhs2 (assign);
+       }
+      else if (is_gimple_call (assign))
+       {
+         lhs = gimple_call_lhs (assign);
+         if (!lhs)
+           return false;
+         combined_fn cfn = gimple_call_combined_fn (assign);
+         switch (cfn)
+           {
+           CASE_CFN_FMIN:
+           CASE_CFN_FMIN_FN:
+             ass_code = MIN_EXPR;
+             op0 = gimple_call_arg (assign, 0);
+             op1 = gimple_call_arg (assign, 1);
+             break;
+           CASE_CFN_FMAX:
+           CASE_CFN_FMAX_FN:
+             ass_code = MAX_EXPR;
+             op0 = gimple_call_arg (assign, 0);
+             op1 = gimple_call_arg (assign, 1);
+             break;
+           default:
+             return false;
+           }
+       }
+      else
        return false;
-      op0 = gimple_assign_rhs1 (assign);
-      op1 = gimple_assign_rhs2 (assign);
 
       if (true_edge->src == middle_bb)
        {
@@ -1574,8 +1606,9 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
                return false;
 
              /* We need BOUND <= LARGER.  */
-             if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
-                                                 bound, larger)))
+             if (bound != larger
+                 && !integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
+                                                    bound, larger)))
                return false;
            }
          else if (operand_equal_for_phi_arg_p (arg_false, smaller)
@@ -1698,7 +1731,16 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
     result = make_ssa_name (TREE_TYPE (result));
 
   /* Emit the statement to compute min/max.  */
-  new_stmt = gimple_build_assign (result, minmax, arg0, arg1);
+  gimple *new_stmt;
+  if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+    {
+      new_stmt = gimple_build_call_internal (minmax == MAX_EXPR
+                                            ? IFN_FMAX : IFN_FMIN,
+                                            2, arg0, arg1);
+      gimple_call_set_lhs (new_stmt, result);
+    }
+  else
+    new_stmt = gimple_build_assign (result, minmax, arg0, arg1);
   gsi = gsi_last_bb (cond_bb);
   gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
 
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index c097840b09a..b1dcaec3d9f 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -849,9 +849,6 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
          else if ((gimple_call_internal_p (call_stmt)
                    && (!vectorizable_internal_fn_p
                        (gimple_call_internal_fn (call_stmt))))
-                  || gimple_call_tail_p (call_stmt)
-                  || gimple_call_noreturn_p (call_stmt)
-                  || !gimple_call_nothrow_p (call_stmt)
                   || gimple_call_chain (call_stmt))
            {
              if (dump_enabled_p ())
-- 
2.13.7

Reply via email to