Hi! We don't try to optimize for signed x, y (int) (x - 1U) * y + y into x * y, we can't do that with signed x * y, because the former is well defined for INT_MIN and -1, while the latter is not. We could perhaps optimize it during isel or some very late optimization where we'd turn magically flag_wrapv, but we don't do that yet.
This patch optimizes it in simplify-rtx.c, such that we can optimize it during combine. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2020-12-17 Jakub Jelinek <ja...@redhat.com> PR rtl-optimization/98334 * simplify-rtx.c (simplify_context::simplify_binary_operation_1): Optimize (X - 1) * Y + Y to X * Y or (X + 1) * Y - Y to X * Y. * gcc.target/i386/pr98334.c: New test. --- gcc/simplify-rtx.c.jj 2020-12-17 02:29:28.661558283 +0100 +++ gcc/simplify-rtx.c 2020-12-17 11:49:25.419871360 +0100 @@ -2602,6 +2602,42 @@ simplify_context::simplify_binary_operat return (set_src_cost (tem, int_mode, speed) <= set_src_cost (orig, int_mode, speed) ? tem : 0); } + + /* Optimize (X - 1) * Y + Y to X * Y. */ + lhs = op0; + rhs = op1; + if (GET_CODE (op0) == MULT) + { + if (((GET_CODE (XEXP (op0, 0)) == PLUS + && XEXP (XEXP (op0, 0), 1) == constm1_rtx) + || (GET_CODE (XEXP (op0, 0)) == MINUS + && XEXP (XEXP (op0, 0), 1) == const1_rtx)) + && rtx_equal_p (XEXP (op0, 1), op1)) + lhs = XEXP (XEXP (op0, 0), 0); + else if (((GET_CODE (XEXP (op0, 1)) == PLUS + && XEXP (XEXP (op0, 1), 1) == constm1_rtx) + || (GET_CODE (XEXP (op0, 1)) == MINUS + && XEXP (XEXP (op0, 1), 1) == const1_rtx)) + && rtx_equal_p (XEXP (op0, 0), op1)) + lhs = XEXP (XEXP (op0, 1), 0); + } + else if (GET_CODE (op1) == MULT) + { + if (((GET_CODE (XEXP (op1, 0)) == PLUS + && XEXP (XEXP (op1, 0), 1) == constm1_rtx) + || (GET_CODE (XEXP (op1, 0)) == MINUS + && XEXP (XEXP (op1, 0), 1) == const1_rtx)) + && rtx_equal_p (XEXP (op1, 1), op0)) + rhs = XEXP (XEXP (op1, 0), 0); + else if (((GET_CODE (XEXP (op1, 1)) == PLUS + && XEXP (XEXP (op1, 1), 1) == constm1_rtx) + || (GET_CODE (XEXP (op1, 1)) == MINUS + && XEXP (XEXP (op1, 1), 1) == const1_rtx)) + && rtx_equal_p (XEXP (op1, 0), op0)) + rhs = XEXP (XEXP (op1, 1), 0); + } + if (lhs != op0 || rhs != op1) + return simplify_gen_binary (MULT, int_mode, lhs, rhs); } /* (plus (xor X C1) C2) is (xor X (C1^C2)) if C2 is signbit. */ @@ -2789,6 +2825,26 @@ simplify_context::simplify_binary_operat return (set_src_cost (tem, int_mode, speed) <= set_src_cost (orig, int_mode, speed) ? tem : 0); } + + /* Optimize (X + 1) * Y - Y to X * Y. */ + lhs = op0; + if (GET_CODE (op0) == MULT) + { + if (((GET_CODE (XEXP (op0, 0)) == PLUS + && XEXP (XEXP (op0, 0), 1) == const1_rtx) + || (GET_CODE (XEXP (op0, 0)) == MINUS + && XEXP (XEXP (op0, 0), 1) == constm1_rtx)) + && rtx_equal_p (XEXP (op0, 1), op1)) + lhs = XEXP (XEXP (op0, 0), 0); + else if (((GET_CODE (XEXP (op0, 1)) == PLUS + && XEXP (XEXP (op0, 1), 1) == const1_rtx) + || (GET_CODE (XEXP (op0, 1)) == MINUS + && XEXP (XEXP (op0, 1), 1) == constm1_rtx)) + && rtx_equal_p (XEXP (op0, 0), op1)) + lhs = XEXP (XEXP (op0, 1), 0); + } + if (lhs != op0) + return simplify_gen_binary (MULT, int_mode, lhs, op1); } /* (a - (-b)) -> (a + b). True even for IEEE. */ --- gcc/testsuite/gcc.target/i386/pr98334.c.jj 2020-12-17 11:56:10.781323162 +0100 +++ gcc/testsuite/gcc.target/i386/pr98334.c 2020-12-17 11:56:56.098814695 +0100 @@ -0,0 +1,36 @@ +/* PR rtl-optimization/98334 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fomit-frame-pointer -fno-stack-protector" } */ +/* { dg-final { scan-assembler-not "\taddl\t" } } */ +/* { dg-final { scan-assembler-not "\tsubl\t" } } */ +/* { dg-final { scan-assembler-not "\tleal\t" } } */ + +int +foo (int i, unsigned int n) +{ + int result = 0; + while (n > 0) + { + result += i; + n -= 1; + } + return result; +} + +int +bar (int x, int y) +{ + return (int) (y - 1U) * x + x; +} + +int +baz (int x, int y) +{ + return (y - 1) * x + x; +} + +int +qux (int x, int y) +{ + return x * (int) (y + 1U) - x; +} Jakub