Hi!

We don't try to optimize for signed x, y (int) (x - 1U) * y + y
into x * y, we can't do that with signed x * y, because the former
is well defined for INT_MIN and -1, while the latter is not.
We could perhaps optimize it during isel or some very late optimization
where we'd turn magically flag_wrapv, but we don't do that yet.

This patch optimizes it in simplify-rtx.c, such that we can optimize
it during combine.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-12-17  Jakub Jelinek  <ja...@redhat.com>

        PR rtl-optimization/98334
        * simplify-rtx.c (simplify_context::simplify_binary_operation_1):
        Optimize (X - 1) * Y + Y to X * Y or (X + 1) * Y - Y to X * Y.

        * gcc.target/i386/pr98334.c: New test.

--- gcc/simplify-rtx.c.jj       2020-12-17 02:29:28.661558283 +0100
+++ gcc/simplify-rtx.c  2020-12-17 11:49:25.419871360 +0100
@@ -2602,6 +2602,42 @@ simplify_context::simplify_binary_operat
              return (set_src_cost (tem, int_mode, speed)
                      <= set_src_cost (orig, int_mode, speed) ? tem : 0);
            }
+
+         /* Optimize (X - 1) * Y + Y to X * Y.  */
+         lhs = op0;
+         rhs = op1;
+         if (GET_CODE (op0) == MULT)
+           {
+             if (((GET_CODE (XEXP (op0, 0)) == PLUS
+                   && XEXP (XEXP (op0, 0), 1) == constm1_rtx)
+                  || (GET_CODE (XEXP (op0, 0)) == MINUS
+                      && XEXP (XEXP (op0, 0), 1) == const1_rtx))
+                 && rtx_equal_p (XEXP (op0, 1), op1))
+               lhs = XEXP (XEXP (op0, 0), 0);
+             else if (((GET_CODE (XEXP (op0, 1)) == PLUS
+                        && XEXP (XEXP (op0, 1), 1) == constm1_rtx)
+                       || (GET_CODE (XEXP (op0, 1)) == MINUS
+                           && XEXP (XEXP (op0, 1), 1) == const1_rtx))
+                      && rtx_equal_p (XEXP (op0, 0), op1))
+               lhs = XEXP (XEXP (op0, 1), 0);
+           }
+         else if (GET_CODE (op1) == MULT)
+           {
+             if (((GET_CODE (XEXP (op1, 0)) == PLUS
+                   && XEXP (XEXP (op1, 0), 1) == constm1_rtx)
+                  || (GET_CODE (XEXP (op1, 0)) == MINUS
+                      && XEXP (XEXP (op1, 0), 1) == const1_rtx))
+                 && rtx_equal_p (XEXP (op1, 1), op0))
+               rhs = XEXP (XEXP (op1, 0), 0);
+             else if (((GET_CODE (XEXP (op1, 1)) == PLUS
+                        && XEXP (XEXP (op1, 1), 1) == constm1_rtx)
+                       || (GET_CODE (XEXP (op1, 1)) == MINUS
+                           && XEXP (XEXP (op1, 1), 1) == const1_rtx))
+                      && rtx_equal_p (XEXP (op1, 0), op0))
+               rhs = XEXP (XEXP (op1, 1), 0);
+           }
+         if (lhs != op0 || rhs != op1)
+           return simplify_gen_binary (MULT, int_mode, lhs, rhs);
        }
 
       /* (plus (xor X C1) C2) is (xor X (C1^C2)) if C2 is signbit.  */
@@ -2789,6 +2825,26 @@ simplify_context::simplify_binary_operat
              return (set_src_cost (tem, int_mode, speed)
                      <= set_src_cost (orig, int_mode, speed) ? tem : 0);
            }
+
+         /* Optimize (X + 1) * Y - Y to X * Y.  */
+         lhs = op0;
+         if (GET_CODE (op0) == MULT)
+           {
+             if (((GET_CODE (XEXP (op0, 0)) == PLUS
+                   && XEXP (XEXP (op0, 0), 1) == const1_rtx)
+                  || (GET_CODE (XEXP (op0, 0)) == MINUS
+                      && XEXP (XEXP (op0, 0), 1) == constm1_rtx))
+                 && rtx_equal_p (XEXP (op0, 1), op1))
+               lhs = XEXP (XEXP (op0, 0), 0);
+             else if (((GET_CODE (XEXP (op0, 1)) == PLUS
+                        && XEXP (XEXP (op0, 1), 1) == const1_rtx)
+                       || (GET_CODE (XEXP (op0, 1)) == MINUS
+                           && XEXP (XEXP (op0, 1), 1) == constm1_rtx))
+                      && rtx_equal_p (XEXP (op0, 0), op1))
+               lhs = XEXP (XEXP (op0, 1), 0);
+           }
+         if (lhs != op0)
+           return simplify_gen_binary (MULT, int_mode, lhs, op1);
        }
 
       /* (a - (-b)) -> (a + b).  True even for IEEE.  */
--- gcc/testsuite/gcc.target/i386/pr98334.c.jj  2020-12-17 11:56:10.781323162 
+0100
+++ gcc/testsuite/gcc.target/i386/pr98334.c     2020-12-17 11:56:56.098814695 
+0100
@@ -0,0 +1,36 @@
+/* PR rtl-optimization/98334 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer -fno-stack-protector" } */
+/* { dg-final { scan-assembler-not "\taddl\t" } } */
+/* { dg-final { scan-assembler-not "\tsubl\t" } } */
+/* { dg-final { scan-assembler-not "\tleal\t" } } */
+
+int
+foo (int i, unsigned int n)
+{
+  int result = 0;
+  while (n > 0)
+    {
+      result += i;
+      n -= 1;
+    }
+  return result;
+}
+
+int
+bar (int x, int y)
+{
+  return (int) (y - 1U) * x + x;
+}
+
+int
+baz (int x, int y)
+{
+  return (y - 1) * x + x;
+}
+
+int
+qux (int x, int y)
+{
+  return x * (int) (y + 1U) - x;
+}

        Jakub

Reply via email to