Hi,

I tried to handle reassoc fails to handle FP division. I think the best way to do this is to do like what is done for MINUS_EXPR. I.e, convert the RDIV_EXPR to MULT_EXPR by (1/x) early and later in opt_rdiv_with_multiply, optimize it.

Here is a patch that passes bootstrap and regression testing on x86-64-linux-gnu.

Does this look Ok for trunk?

Thanks,
Kugan

gcc/testsuite/ChangeLog:

2016-05-05  Kugan Vivekanandarajah  <kug...@linaro.org>

        PR middle-end/70841
        * gcc.dg/tree-ssa/pr70841.c: New test.

gcc/ChangeLog:

2016-05-05  Kugan Vivekanandarajah  <kug...@linaro.org>

        PR middle-end/70841
        * tree-ssa-reassoc.c (should_break_up_rdiv): New.
        (break_up_rdiv): New
        (break_up_subtract_bb): Call should_break_up_rdiv and break_up_rdiv.
        (do_reassoc): Rename break_up_subtract_bb to 
break_up_subtract_and_div_bb.
        (sort_cmp_int): New.
        (opt_rdiv_with_multiply): New.
        (reassociate_bb): Call opt_rdiv_with_multiply.
        (do_reassoc): Renamed called function break_up_subtract_bb to
        break_up_subtract_and_div_bb.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr70841.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr70841.c
index e69de29..0b456aa 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr70841.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr70841.c
@@ -0,0 +1,15 @@
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -freciprocal-math -fdump-tree-optimized" } */
+
+float foo (float x, float y)
+{
+    return x * y / x;
+}
+
+float foo2 (float x, float y)
+{
+    return (y / x) * x ;
+}
+
+/* { dg-final { scan-tree-dump-times "return y_" 2 "optimized" } } */
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index d23dabd..29a5422 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -4168,6 +4168,40 @@ should_break_up_subtract (gimple *stmt)
   return false;
 }
 
+/* Return true if we should break up the RDIV in STMT into an  MULT_EXPR
+   with reciprocal.  */
+
+static bool
+should_break_up_rdiv (gimple *stmt)
+{
+  tree lhs = gimple_assign_lhs (stmt);
+  tree binlhs = gimple_assign_rhs1 (stmt);
+  tree binrhs = gimple_assign_rhs2 (stmt);
+  gimple *immusestmt;
+
+  if (VECTOR_TYPE_P (TREE_TYPE (lhs)))
+    return false;
+
+  if (TREE_CODE (lhs) == SSA_NAME
+      && (immusestmt = get_single_immediate_use (lhs))
+      && is_gimple_assign (immusestmt)
+      && gimple_assign_rhs_code (immusestmt) == MULT_EXPR)
+    return true;
+  if (TREE_CODE (binlhs) == SSA_NAME
+      && (immusestmt = SSA_NAME_DEF_STMT (binlhs))
+      && get_single_immediate_use (binlhs)
+      && is_gimple_assign (immusestmt)
+      && gimple_assign_rhs_code (immusestmt) == MULT_EXPR)
+    return true;
+  if (TREE_CODE (binrhs) == SSA_NAME
+      && (immusestmt = SSA_NAME_DEF_STMT (binrhs))
+      && get_single_immediate_use (binrhs)
+      && is_gimple_assign (immusestmt)
+      && gimple_assign_rhs_code (immusestmt) == MULT_EXPR)
+    return true;
+  return false;
+}
+
 /* Transform STMT from A - B into A + -B.  */
 
 static void
@@ -4187,6 +4221,23 @@ break_up_subtract (gimple *stmt, gimple_stmt_iterator 
*gsip)
   update_stmt (stmt);
 }
 
+/* Transform STMT from A / B into A X (1/B).  */
+static void
+break_up_rdiv (gimple *stmt, gimple_stmt_iterator *gsip)
+{
+  tree rhs1 = gimple_assign_rhs1 (stmt);
+  tree rhs2 = gimple_assign_rhs2 (stmt);
+  tree tmp = make_ssa_name (TREE_TYPE (rhs1));
+  tree one = fold_convert (TREE_TYPE (rhs1),
+                          build_int_cst (integer_type_node, 1));
+  gassign *div_stmt = gimple_build_assign (tmp, RDIV_EXPR, one, rhs2);
+  gimple_set_uid (div_stmt, gimple_uid (stmt));
+  gsi_insert_before (gsip, div_stmt, GSI_NEW_STMT);
+  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+  gimple_assign_set_rhs_with_ops (&gsi, MULT_EXPR, rhs1, tmp);
+  update_stmt (stmt);
+}
+
 /* Determine whether STMT is a builtin call that raises an SSA name
    to an integer power and has only one use.  If so, and this is early
    reassociation and unsafe math optimizations are permitted, place
@@ -4492,7 +4543,7 @@ can_reassociate_p (tree op)
    and set UIDs within each basic block.  */
 
 static void
-break_up_subtract_bb (basic_block bb)
+break_up_subtract_and_div_bb (basic_block bb)
 {
   gimple_stmt_iterator gsi;
   basic_block son;
@@ -4522,6 +4573,15 @@ break_up_subtract_bb (basic_block bb)
          if (should_break_up_subtract (stmt))
            break_up_subtract (stmt, &gsi);
        }
+      else if (flag_reciprocal_math
+         && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
+       {
+         if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
+             || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
+           continue;
+         if (should_break_up_rdiv (stmt))
+           break_up_rdiv (stmt, &gsi);
+       }
       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
               && can_reassociate_p (gimple_assign_rhs1 (stmt)))
        plus_negates.safe_push (gimple_assign_lhs (stmt));
@@ -4529,7 +4589,7 @@ break_up_subtract_bb (basic_block bb)
   for (son = first_dom_son (CDI_DOMINATORS, bb);
        son;
        son = next_dom_son (CDI_DOMINATORS, son))
-    break_up_subtract_bb (son);
+    break_up_subtract_and_div_bb (son);
 }
 
 /* Used for repeated factor analysis.  */
@@ -5015,6 +5075,67 @@ transform_stmt_to_multiply (gimple_stmt_iterator *gsi, 
gimple *stmt,
     }
 }
 
+/* Helper to sort int in descending order.  */
+static int
+sort_cmp_int (const void *pa, const void *pb)
+{
+  const int a = *(const int *)pa;
+  const int b = *(const int *)pb;
+  return b - a;
+}
+
+/* For -freciprocal-math, optimize MULT_EXPR of (x) and (1/x).  */
+
+static bool
+opt_rdiv_with_multiply (vec<operand_entry *> *ops)
+{
+  bool changed = false;
+  vec<int> indxs = vNULL;
+  /* FIXME Since we do O(n^2) comaprsions, skip this optimization if we have
+     too many operands.  This is going to be very rare.  */
+  if (ops->length () > 10)
+    return false;
+
+  for (unsigned int i = 0; i < ops->length (); ++i)
+    {
+      tree op = (*ops)[i]->op;
+      if (TREE_CODE (op) != SSA_NAME
+         || !is_gimple_assign (SSA_NAME_DEF_STMT (op)))
+       continue;
+
+      gimple *def_stmt = SSA_NAME_DEF_STMT (op);
+      enum tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
+      tree rhs1 = gimple_assign_rhs1 (def_stmt);
+      tree rhs2 = gimple_assign_rhs2 (def_stmt);
+
+      if (rhs_code == RDIV_EXPR
+         && TREE_CODE (rhs1) == REAL_CST
+         && real_equal (&TREE_REAL_CST (rhs1), &dconst1))
+       {
+         for (unsigned int j = 0; j < ops->length (); ++j)
+           {
+             tree op = (*ops)[j]->op;
+             if (op == rhs2)
+               {
+                 indxs.safe_push (i);
+                 indxs.safe_push (j);
+               }
+           }
+       }
+    }
+
+  if (indxs.length () > 0)
+    {
+      changed = true;
+      /* Sort the indexs in descending order and remove from the back.  */
+      indxs.qsort (sort_cmp_int);
+      for (unsigned int i = 0; i < indxs.length () ; ++i)
+       ops->unordered_remove (indxs[i]);
+    }
+
+  return changed;
+}
+
 /* Reassociate expressions in basic block BB and its post-dominator as
    children.
 
@@ -5110,6 +5231,11 @@ reassociate_bb (basic_block bb)
                  optimize_ops_list (rhs_code, &ops);
                }
 
+             if (rhs_code == MULT_EXPR
+                 && flag_reciprocal_math
+                 && opt_rdiv_with_multiply (&ops))
+               ops.qsort (sort_by_operand_rank);
+
              if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
                {
                  if (is_vector)
@@ -5308,7 +5434,7 @@ debug_ops_vector (vec<operand_entry *> ops)
 static bool
 do_reassoc (void)
 {
-  break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+  break_up_subtract_and_div_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
   return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
 }
 

Reply via email to