Hi! The following patch generalizes the x ? 1 : 0 -> (int) x optimization to handle also left shifts by constant.
During x86_64-linux and i686-linux bootstraps + regtests it triggered in 1514 unique non-LTO -m64 cases (sort -u on log mentioning filename, function name and shift count) and 1866 -m32 cases. Unfortunately, the patch regresses: +FAIL: gcc.dg/tree-ssa/ssa-ccp-11.c scan-tree-dump-times optimized "if " 0 +FAIL: gcc.dg/vect/bb-slp-pattern-2.c -flto -ffat-lto-objects scan-tree-dump-times slp1 "optimized: basic block" 1 +FAIL: gcc.dg/vect/bb-slp-pattern-2.c scan-tree-dump-times slp1 "optimized: basic block" 1 and in both cases it actually results in worse code. In ssa-ccp-11.c since phiopt2 it results in smaller IL due to the optimization, e.g. - if (_1 != 0) - goto <bb 5>; [21.72%] - else - goto <bb 6>; [78.28%] - - <bb 5> [local count: 233216728]: - - <bb 6> [local count: 1073741824]: - # _4 = PHI <2(5), 0(4)> - return _4; + _7 = (int) _1; + _8 = _7 << 1; + return _8; but dom2 actually manages to optimize it only without this optimization: - # a_7 = PHI <0(3), 1(2)> - # b_8 = PHI <1(3), 0(2)> - _9 = a_7 & b_8; - return 0; + # a_2 = PHI <1(2), 0(3)> + # b_3 = PHI <0(2), 1(3)> + _1 = a_2 & b_3; + _7 = (int) _1; + _8 = _7 << 1; + return _8; We'd need some optimization that would go through all PHI edges and compute if some use of the phi results don't actually compute a constant across all the PHI edges - 1 & 0 and 0 & 1 is always 0. Similarly in the other function + # a_1 = PHI <3(2), 2(3)> + # b_2 = PHI <2(2), 3(3)> + c_5 = a_1 + b_2; is always c_5 = 5; Similarly, in the slp vectorization test there is: a[0] = b[0] ? 1 : 7; a[1] = b[1] ? 2 : 0; a[2] = b[2] ? 3 : 0; a[3] = b[3] ? 4 : 0; a[4] = b[4] ? 5 : 0; a[5] = b[5] ? 6 : 0; a[6] = b[6] ? 7 : 0; a[7] = b[7] ? 8 : 0; and obviously if the ? 2 : 0 and ? 4 : 0 and ? 8 : 0 are optimized into shifts, it doesn't match anymore. So, I wonder if we e.g. shouldn't perform this optimization only in the last phiopt pass (i.e. change the bool early argument to int late where it would be 0 (early), 1 (late) and 2 (very late) and perform this only if very late. Thoughts on this? 2020-11-03 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/97690 * tree-ssa-phiopt.c (conditional_replacement): Also optimize cond ? pow2p_cst : 0 as ((type) cond) << cst. * gcc.dg/tree-ssa/phi-opt-22.c: New test. --- gcc/tree-ssa-phiopt.c.jj 2020-10-22 09:36:25.602484491 +0200 +++ gcc/tree-ssa-phiopt.c 2020-11-03 17:59:18.133662581 +0100 @@ -752,7 +752,9 @@ conditional_replacement (basic_block con gimple_stmt_iterator gsi; edge true_edge, false_edge; tree new_var, new_var2; - bool neg; + bool neg = false; + int shift = 0; + tree nonzero_arg; /* FIXME: Gimplification of complex type is too hard for now. */ /* We aren't prepared to handle vectors either (and it is a question @@ -763,14 +765,22 @@ conditional_replacement (basic_block con || POINTER_TYPE_P (TREE_TYPE (arg1)))) return false; - /* The PHI arguments have the constants 0 and 1, or 0 and -1, then - convert it to the conditional. */ - if ((integer_zerop (arg0) && integer_onep (arg1)) - || (integer_zerop (arg1) && integer_onep (arg0))) - neg = false; - else if ((integer_zerop (arg0) && integer_all_onesp (arg1)) - || (integer_zerop (arg1) && integer_all_onesp (arg0))) + /* The PHI arguments have the constants 0 and 1, or 0 and -1 or + 0 and (1 << cst), then convert it to the conditional. */ + if (integer_zerop (arg0)) + nonzero_arg = arg1; + else if (integer_zerop (arg1)) + nonzero_arg = arg0; + else + return false; + if (integer_all_onesp (nonzero_arg)) neg = true; + else if (integer_pow2p (nonzero_arg)) + { + shift = tree_log2 (nonzero_arg); + if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg))) + return false; + } else return false; @@ -782,12 +792,12 @@ conditional_replacement (basic_block con falls through into BB. There is a single PHI node at the join point (BB) and its arguments - are constants (0, 1) or (0, -1). + are constants (0, 1) or (0, -1) or (0, (1 << shift)). So, given the condition COND, and the two PHI arguments, we can rewrite this PHI into non-branching code: - dest = (COND) or dest = COND' + dest = (COND) or dest = COND' or dest = (COND) << shift We use the condition as-is if the argument associated with the true edge has the value one or the argument associated with the @@ -822,6 +832,14 @@ conditional_replacement (basic_block con cond = fold_build1_loc (gimple_location (stmt), NEGATE_EXPR, TREE_TYPE (cond), cond); } + else if (shift) + { + cond = fold_convert_loc (gimple_location (stmt), + TREE_TYPE (result), cond); + cond = fold_build2_loc (gimple_location (stmt), + LSHIFT_EXPR, TREE_TYPE (cond), cond, + build_int_cst (integer_type_node, shift)); + } /* Insert our new statements at the end of conditional block before the COND_STMT. */ --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c.jj 2020-11-03 18:22:19.756124543 +0100 +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c 2020-11-03 18:25:12.795176885 +0100 @@ -0,0 +1,11 @@ +/* PR tree-optimization/97690 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-phiopt2" } */ + +int foo (_Bool d) { return d ? 2 : 0; } +int bar (_Bool d) { return d ? 1 : 0; } +int baz (_Bool d) { return d ? -__INT_MAX__ - 1 : 0; } +int qux (_Bool d) { return d ? 1024 : 0; } + +/* { dg-final { scan-tree-dump-not "if" "phiopt2" } } */ +/* { dg-final { scan-tree-dump-times " << " 3 "phiopt2" } } */ Jakub