As Richard's comment, this patch is composed to simplify generalized
binary-with-convert pattern like ((T) X) OP ((T) Y). Instead of creating
almost duplicated rules into match.pd, we try to transform it to (T) (X OP Y),
and apply simplification on (X OP Y) in forwprop pass.

Regards,
Feng
---
2020-08-19  Feng Xue  <f...@os.amperecomputing.com>

gcc/
        PR tree-optimization/94234
        * tree-ssa-forwprop.c (simplify_binary_with_convert): New function.
        * (fwprop_ssa_val): Move it before its new caller.

gcc/testsuite/
        PR tree-optimization/94234
        * gcc.dg/ifcvt-3.c: Modified to suppress forward propagation.
        * gcc.dg/tree-ssa/20030807-10.c: Likewise.
        * gcc.dg/pr94234-2.c: New test.

> ________________________________________
> From: Richard Biener <richard.guent...@gmail.com>
> Sent: Monday, June 15, 2020 3:41 PM
> To: Feng Xue OS
> Cc: gcc-patches@gcc.gnu.org; Marc Glisse
> Subject: Re: [PATCH] Add pattern for pointer-diff on addresses with same 
> base/offset (PR 94234)
> 
> On Fri, Jun 5, 2020 at 11:20 AM Feng Xue OS <f...@os.amperecomputing.com> 
> wrote:
>>
>>  As Marc suggested, removed the new pointer_diff rule, and add another rule 
>> to fold
>>  convert-add expression. This new rule is:
>> 
>>    (T)(A * C) +- (T)(B * C) -> (T) ((A +- B) * C)
>> 
>>  Regards,
>>  Feng
>> 
>>  ---
>> 2020-06-01  Feng Xue  <f...@os.amperecomputing.com>
>> 
>>  gcc/
>>          PR tree-optimization/94234
>>          * match.pd ((T)(A * C) +- (T)(B * C)) -> (T)((A +- B) * C): New
>>          simplification.
>>          * ((PTR_A + OFF) - (PTR_B + OFF)) -> (PTR_A - PTR_B): New
>>          simplification.
>> 
>>  gcc/testsuite/
>>          PR tree-optimization/94234
>>          * gcc.dg/pr94234.c: New test.
>>  ---
>>   gcc/match.pd                   | 28 ++++++++++++++++++++++++++++
>>   gcc/testsuite/gcc.dg/pr94234.c | 24 ++++++++++++++++++++++++
>>   2 files changed, 52 insertions(+)
>>   create mode 100644 gcc/testsuite/gcc.dg/pr94234.c
>> 
>>  diff --git a/gcc/match.pd b/gcc/match.pd
>>  index 33ee1a920bf..4f340bfe40a 100644
>>  --- a/gcc/match.pd
>>  +++ b/gcc/match.pd
>>  @@ -2515,6 +2515,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>>               && TREE_CODE (@2) == INTEGER_CST
>>               && tree_int_cst_sign_bit (@2) == 0))
>>        (minus (convert @1) (convert @2)))))
>>  +   (simplify
>>  +    (pointer_diff (pointer_plus @0 @2) (pointer_plus @1 @2))
>>  +     (pointer_diff @0 @1))
> 
> This new pattern is OK.  Please commit it separately.
> 
>>      (simplify
>>       (pointer_diff (pointer_plus @@0 @1) (pointer_plus @0 @2))
>>       /* The second argument of pointer_plus must be interpreted as signed, 
>> and
>>  @@ -2526,6 +2529,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>>        (minus (convert (view_convert:stype @1))
>>              (convert (view_convert:stype @2)))))))
>> 
>>  +/* (T)(A * C) +- (T)(B * C) -> (T)((A +- B) * C) and
>>  +   (T)(A * C) +- (T)(A) -> (T)(A * (C +- 1)). */
>>  +(if (INTEGRAL_TYPE_P (type))
>>  + (for plusminus (plus minus)
>>  +  (simplify
>>  +   (plusminus (convert:s (mult:cs @0 @1)) (convert:s (mult:cs @0 @2)))
>>  +   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
>>  +       && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
>>  +       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
>>  +    (convert (mult (plusminus @1 @2) @0))))
>>  +  (simplify
>>  +   (plusminus (convert @0) (convert@2 (mult:c@3 @0 @1)))
>>  +   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
>>  +       && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
>>  +       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
>>  +       && single_use (@2) && single_use (@3))
>>  +    (convert (mult (plusminus { build_one_cst (TREE_TYPE (@1)); } @1) 
>> @0))))
>>  +  (simplify
>>  +   (plusminus (convert@2 (mult:c@3 @0 @1)) (convert @0))
>>  +   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
>>  +       && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
>>  +       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
>>  +       && single_use (@2) && single_use (@3))
>>  +    (convert (mult (plusminus @1 { build_one_cst (TREE_TYPE (@1)); }) 
>> @0))))))
>>  +
> 
> This shows the limit of pattern matching IMHO.  I'm also not convinced
> it gets the
> overflow cases correct (but I didn't spend too much time here).  Note we have
> similar functionality implemented in fold_plusminus_mult_expr.  IMHO instead
> of doing the above moving fold_plusminus_mult_expr to GIMPLE by executing
> it from inside the forwprop pass would make more sense.  Or finally biting the
> bullet and try to teach reassociation about how to handle signed arithmetic
> with non-wrapping overflow behavior.
> 
> Richard.

From 68bba2edb714f741ef6e7f4a7814869cb99e938c Mon Sep 17 00:00:00 2001
From: Feng Xue <f...@os.amperecomputing.com>
Date: Mon, 17 Aug 2020 23:00:35 +0800
Subject: [PATCH] Simplify binary-with-convert expression in forwprop pass (PR
 94234)

For expression as ((T) X) OP ((T) Y), try to transform it to (T) (X OP Y),
and apply simplification on (X OP Y) if possible. In this way, we can avoid
creating nearly duplicated rule to handle binary-with-convert variant.

2020-08-19  Feng Xue  <f...@os.amperecomputing.com>

gcc/
        PR tree-optimization/94234
        * tree-ssa-forwprop.c (simplify_binary_with_convert): New function.
        * (fwprop_ssa_val): Move it before its new caller.

gcc/testsuite/
        PR tree-optimization/94234
        * gcc.dg/ifcvt-3.c: Modified to suppress forward propagation.
        * gcc.dg/tree-ssa/20030807-10.c: Likewise.
        * gcc.dg/pr94234-2.c: New test.
---
 gcc/testsuite/gcc.dg/ifcvt-3.c              |   2 +-
 gcc/testsuite/gcc.dg/pr94234-2.c            |  42 +++++
 gcc/testsuite/gcc.dg/tree-ssa/20030807-10.c |   2 +-
 gcc/tree-ssa-forwprop.c                     | 166 +++++++++++++++++---
 4 files changed, 191 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr94234-2.c

diff --git a/gcc/testsuite/gcc.dg/ifcvt-3.c b/gcc/testsuite/gcc.dg/ifcvt-3.c
index b250bc15e08..56fdd753a0a 100644
--- a/gcc/testsuite/gcc.dg/ifcvt-3.c
+++ b/gcc/testsuite/gcc.dg/ifcvt-3.c
@@ -11,7 +11,7 @@ foo (s64 a, s64 b, s64 c)
   if (d == 0)
     return a + c;
   else
-    return b + d + c;
+    return b + c + d;
 }
 
 /* This test can be reduced to just return a + c;  */
diff --git a/gcc/testsuite/gcc.dg/pr94234-2.c b/gcc/testsuite/gcc.dg/pr94234-2.c
new file mode 100644
index 00000000000..d520b926adb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr94234-2.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop2" } */
+
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+ptrdiff_t foo1 (char *a, size_t n)
+{
+  char *b1 = a + 8 * n;
+  char *b2 = a + 8 * (n - 1);
+
+  return b1 - b2;
+}
+
+int use_ptr (char *a, char *b);
+
+ptrdiff_t foo2 (char *a, size_t n)
+{
+  char *b1 = a + 8 * (n - 1);
+  char *b2 = a + 8 * n;
+
+  use_ptr (b1, b2);
+
+  return b1 - b2;
+}
+
+int use_int (unsigned i, int j);
+
+int goo (int m_param, int n_param)
+{
+  int a = m_param + n_param;
+  int b = -n_param;
+  unsigned r = (unsigned)(a & b) + (unsigned)(a | b); /* (unsigned) (a + b) */
+
+  use_int (r, b); 
+
+  return (int)r;
+}
+
+/* { dg-final { scan-tree-dump-times "return 8;" 1 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "return -8;" 1 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "return m_param" 1 "forwprop2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20030807-10.c b/gcc/testsuite/gcc.dg/tree-ssa/20030807-10.c
index 0903f3c4321..0e01e511b78 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/20030807-10.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/20030807-10.c
@@ -7,7 +7,7 @@ unsigned int
 subreg_highpart_offset (outermode, innermode)
      int outermode, innermode;
 {
-  unsigned int offset = 0;
+  unsigned int offset = 1;
   int difference = (mode_size[innermode] - mode_size[outermode]);
   if (difference > 0)
     {
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index a4aed3ccade..b7bb1d2aed6 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -338,6 +338,25 @@ remove_prop_source_from_use (tree name)
   return cfg_changed;
 }
 
+/* Primitive "lattice" function for gimple_simplify.  */
+
+static tree
+fwprop_ssa_val (tree name)
+{
+  /* First valueize NAME.  */
+  if (TREE_CODE (name) == SSA_NAME
+      && SSA_NAME_VERSION (name) < lattice.length ())
+    {
+      tree val = lattice[SSA_NAME_VERSION (name)];
+      if (val)
+	name = val;
+    }
+  /* We continue matching along SSA use-def edges for SSA names
+     that are not single-use.  Currently there are no patterns
+     that would cause any issues with that.  */
+  return name;
+}
+
 /* Return the rhs of a gassign *STMT in a form of a single tree,
    converted to type TYPE.
 
@@ -1821,6 +1840,131 @@ simplify_rotate (gimple_stmt_iterator *gsi)
   return true;
 }
 
+/* Given ((T) X) OP ((T) Y), if it can be transformed to (T) (X OP Y) in
+   terms of two's complement computation, apply simplification on (X OP Y)
+   if it is possible.  As a prerequisite, outer result type (T) has precision
+   not more than that of inner operand type.  */
+
+static bool
+simplify_binary_with_convert (gimple_stmt_iterator *gsi)
+{
+  gimple *stmt = gsi_stmt (*gsi);
+  tree lhs = gimple_assign_lhs (stmt);
+  tree rtype = TREE_TYPE (lhs);
+  tree ctype = NULL_TREE;
+  enum tree_code code = gimple_assign_rhs_code (stmt);
+  bool check_overflow = false;
+
+  if (!INTEGRAL_TYPE_P (rtype))
+    return false;
+
+  switch (code)
+    {
+    case MULT_EXPR:
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+      /* Skip arithemtic operations that need to preserve overflow for
+	 trapping or sanitization.  */
+      if (TYPE_OVERFLOW_TRAPS (rtype) || TYPE_OVERFLOW_SANITIZED (rtype))
+	return false;
+
+      check_overflow = true;
+      break;
+
+    case BIT_IOR_EXPR:
+    case BIT_XOR_EXPR:
+    case BIT_AND_EXPR:
+      break;
+
+    default:
+      return false;
+    }
+
+  tree arg[2] = { gimple_assign_rhs1 (stmt), gimple_assign_rhs2 (stmt) };
+  bool any_single_use = false;
+
+  for (int i = 0; i < 2; i++)
+    {
+      if (TREE_CODE (arg[i]) == SSA_NAME)
+	{
+	  tree def_arg;
+	  enum tree_code def_code;
+
+	  defcodefor_name (arg[i], &def_code, &def_arg, NULL);
+
+	  if (!CONVERT_EXPR_CODE_P (def_code))
+	    return false;
+
+	  if (!ctype)
+	    {
+	      ctype = TREE_TYPE (def_arg);
+
+	      if (!INTEGRAL_TYPE_P (ctype)
+		  || TYPE_PRECISION (ctype) < TYPE_PRECISION (rtype))
+		return false;
+
+	      if (i)
+		{
+		  /* The other operand should be a constant, convert it to
+		     inner operand type. */
+		  arg[0] = wide_int_to_tree (ctype, wi::to_wide (arg[0]));
+		}
+	    }
+	  else if (!types_compatible_p (ctype, TREE_TYPE (def_arg)))
+	    return false;
+
+	  if (has_single_use (arg[i]))
+	    any_single_use = true;
+
+	  arg[i] = def_arg;
+	}
+      else if (TREE_CODE (arg[i]) == INTEGER_CST)
+	{
+	  if (!ctype)
+	    return false;
+
+	  /* This operand is a constant, convert it to inner operand type. */
+	  arg[i] = wide_int_to_tree (ctype, wi::to_wide (arg[i]));
+	}
+      else
+	return false;
+    }
+
+  gimple_seq seq = NULL;
+  gimple_seq *pseq = any_single_use ? &seq : NULL;
+  gimple *g;
+  tree val;
+
+  if (check_overflow && !TYPE_OVERFLOW_WRAPS (ctype)
+      && (TYPE_SIGN (ctype) != TYPE_SIGN (rtype)))
+    {
+      /* Outer result type is unsigned, while inner operand type is signed.
+	 Simplification on (X OP Y) is allowed if it does not introduce any
+	 new arithmetic operations which could overflow (not plus/minus/mul
+	 /neg/abs).  Here we only consider the simplest case, in which no new
+	 operation is generated, and final result is a constant or existing
+	 SSA value.  */
+      pseq = NULL;
+    }
+
+  if (!(val = gimple_simplify (code, ctype, arg[0], arg[1], pseq,
+			       fwprop_ssa_val)))
+    return false;
+
+  if (TREE_CODE (val) == INTEGER_CST)
+    g = gimple_build_assign (lhs, NOP_EXPR,
+			     wide_int_to_tree (rtype, wi::to_wide (val)));
+  else if (TREE_CODE (val) == SSA_NAME)
+    g = gimple_build_assign (lhs, NOP_EXPR, val);
+  else
+    return false;
+
+  if (seq)
+    gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+
+  gsi_replace (gsi, g, false);
+  return true;
+}
 
 /* Check whether an array contains a valid ctz table.  */
 static bool
@@ -2640,25 +2784,6 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 }
 
 
-/* Primitive "lattice" function for gimple_simplify.  */
-
-static tree
-fwprop_ssa_val (tree name)
-{
-  /* First valueize NAME.  */
-  if (TREE_CODE (name) == SSA_NAME
-      && SSA_NAME_VERSION (name) < lattice.length ())
-    {
-      tree val = lattice[SSA_NAME_VERSION (name)];
-      if (val)
-	name = val;
-    }
-  /* We continue matching along SSA use-def edges for SSA names
-     that are not single-use.  Currently there are no patterns
-     that would cause any issues with that.  */
-  return name;
-}
-
 /* Main entry point for the forward propagation and statement combine
    optimizer.  */
 
@@ -3154,6 +3279,9 @@ pass_forwprop::execute (function *fun)
 			      || code == BIT_XOR_EXPR)
 			     && simplify_rotate (&gsi))
 		      changed = true;
+		    else if (TREE_CODE_CLASS (code) == tcc_binary
+			     && simplify_binary_with_convert (&gsi))
+		      changed = true;
 		    else if (code == VEC_PERM_EXPR)
 		      {
 			int did_something = simplify_permutation (&gsi);
-- 
2.17.1

Reply via email to