On December 12, 2020 9:01:50 AM GMT+01:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >The following patch recognizes another form of hand written >__builtin_add_overflow (this time _p), in particular when >the code does unsigned >if (x > ~0U - y) >or >if (x <= ~0U - y) >it can be optimized (if the subtraction turned into ~y is single use) >into >if (__builtin_add_overflow_p (x, y, 0U)) >or >if (!__builtin_add_overflow_p (x, y, 0U)) >and generate better code, e.g. for the first function in the testcase: >- movl %esi, %eax > addl %edi, %esi >- notl %eax >- cmpl %edi, %eax >- movl $-1, %eax >- cmovnb %esi, %eax >+ jc .L3 >+ movl %esi, %eax >+ ret >+.L3: >+ orl $-1, %eax > ret >on x86_64. As for the jumps vs. conditional move case, that is some CE >issue with complex branch patterns we should fix up no matter what, but >in this case I'm actually not sure if branchy code isn't better, >overflow >is something that isn't that common. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok. Richard. >2020-12-12 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/96272 > * tree-ssa-math-opts.c (uaddsub_overflow_check_p): Add OTHER argument. > Handle BIT_NOT_EXPR. > (match_uaddsub_overflow): Optimize unsigned a > ~b into > __imag__ .ADD_OVERFLOW (a, b). > (math_opts_dom_walker::after_dom_children): Call >match_uaddsub_overflow > even for BIT_NOT_EXPR. > > * gcc.dg/tree-ssa/pr96272.c: New test. > >--- gcc/tree-ssa-math-opts.c.jj 2020-11-22 19:16:25.302440812 +0100 >+++ gcc/tree-ssa-math-opts.c 2020-12-11 16:19:17.314846597 +0100 >@@ -3457,7 +3457,8 @@ convert_mult_to_fma (gimple *mul_stmt, t > and 0 otherwise. */ > > static int >-uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt, tree maxval) >+uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt, tree maxval, >+ tree *other) > { > enum tree_code ccode = ERROR_MARK; > tree crhs1 = NULL_TREE, crhs2 = NULL_TREE; >@@ -3520,6 +3521,13 @@ uaddsub_overflow_check_p (gimple *stmt, > || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2) > && crhs2 == lhs)) > return ccode == GT_EXPR ? 1 : -1; >+ /* r = ~a; b > r or b <= r. */ >+ if (code == BIT_NOT_EXPR && crhs2 == lhs) >+ { >+ if (other) >+ *other = crhs1; >+ return ccode == GT_EXPR ? 1 : -1; >+ } > break; > case LT_EXPR: > case GE_EXPR: >@@ -3531,6 +3539,13 @@ uaddsub_overflow_check_p (gimple *stmt, > || (code == PLUS_EXPR && crhs1 == lhs > && (crhs2 == rhs1 || crhs2 == rhs2))) > return ccode == LT_EXPR ? 1 : -1; >+ /* r = ~a; r < b or r >= b. */ >+ if (code == BIT_NOT_EXPR && crhs1 == lhs) >+ { >+ if (other) >+ *other = crhs2; >+ return ccode == LT_EXPR ? 1 : -1; >+ } > break; > default: > break; >@@ -3560,7 +3575,15 @@ uaddsub_overflow_check_p (gimple *stmt, > _9 = REALPART_EXPR <_7>; > _8 = IMAGPART_EXPR <_8>; > if (_8) >- and replace (utype) x with _9. */ >+ and replace (utype) x with _9. >+ >+ Also recognize: >+ x = ~z; >+ if (y > x) >+ and replace it with >+ _7 = ADD_OVERFLOW (y, z); >+ _8 = IMAGPART_EXPR <_8>; >+ if (_8) */ > > static bool > match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt, >@@ -3576,34 +3599,49 @@ match_uaddsub_overflow (gimple_stmt_iter > gimple *add_stmt = NULL; > bool add_first = false; > >- gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR); >+ gcc_checking_assert (code == PLUS_EXPR >+ || code == MINUS_EXPR >+ || code == BIT_NOT_EXPR); > if (!INTEGRAL_TYPE_P (type) > || !TYPE_UNSIGNED (type) > || has_zero_uses (lhs) >- || (code == MINUS_EXPR >- && optab_handler (usubv4_optab, >+ || (code != PLUS_EXPR >+ && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab, > TYPE_MODE (type)) == CODE_FOR_nothing)) > return false; > >+ tree rhs1 = gimple_assign_rhs1 (stmt); >+ tree rhs2 = gimple_assign_rhs2 (stmt); > FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) > { > use_stmt = USE_STMT (use_p); > if (is_gimple_debug (use_stmt)) > continue; > >- if (uaddsub_overflow_check_p (stmt, use_stmt, NULL_TREE)) >- ovf_use_seen = true; >+ tree other = NULL_TREE; >+ if (uaddsub_overflow_check_p (stmt, use_stmt, NULL_TREE, >&other)) >+ { >+ if (code == BIT_NOT_EXPR) >+ { >+ gcc_assert (other); >+ if (TREE_CODE (other) != SSA_NAME) >+ return false; >+ if (rhs2 == NULL) >+ rhs2 = other; >+ else if (rhs2 != other) >+ return false; >+ } >+ ovf_use_seen = true; >+ } > else > use_seen = true; > if (ovf_use_seen && use_seen) > break; > } > >- tree rhs1 = gimple_assign_rhs1 (stmt); >- tree rhs2 = gimple_assign_rhs2 (stmt); > tree maxval = NULL_TREE; > if (!ovf_use_seen >- || !use_seen >+ || (code == BIT_NOT_EXPR ? use_seen : !use_seen) > || (code == PLUS_EXPR > && optab_handler (uaddv4_optab, > TYPE_MODE (type)) == CODE_FOR_nothing)) >@@ -3664,7 +3702,7 @@ match_uaddsub_overflow (gimple_stmt_iter > if (is_gimple_debug (use_stmt)) > continue; > >- if (uaddsub_overflow_check_p (stmt, use_stmt, maxval)) >+ if (uaddsub_overflow_check_p (stmt, use_stmt, maxval, NULL)) > { > ovf_use_seen = true; > use_bb = gimple_bb (use_stmt); >@@ -3781,23 +3819,27 @@ match_uaddsub_overflow (gimple_stmt_iter > } > > tree ctype = build_complex_type (type); >- gcall *g = gimple_build_call_internal (code == PLUS_EXPR >+ gcall *g = gimple_build_call_internal (code != MINUS_EXPR > ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW, > 2, rhs1, rhs2); > tree ctmp = make_ssa_name (ctype); > gimple_call_set_lhs (g, ctmp); > gsi_insert_before (gsi, g, GSI_SAME_STMT); > tree new_lhs = maxval ? make_ssa_name (type) : lhs; >- gassign *g2 = gimple_build_assign (new_lhs, REALPART_EXPR, >- build1 (REALPART_EXPR, type, ctmp)); >- if (maxval) >+ gassign *g2; >+ if (code != BIT_NOT_EXPR) > { >- gsi_insert_before (gsi, g2, GSI_SAME_STMT); >- if (add_first) >- *gsi = gsi_for_stmt (stmt); >+ g2 = gimple_build_assign (new_lhs, REALPART_EXPR, >+ build1 (REALPART_EXPR, type, ctmp)); >+ if (maxval) >+ { >+ gsi_insert_before (gsi, g2, GSI_SAME_STMT); >+ if (add_first) >+ *gsi = gsi_for_stmt (stmt); >+ } >+ else >+ gsi_replace (gsi, g2, true); > } >- else >- gsi_replace (gsi, g2, true); > tree ovf = make_ssa_name (type); > g2 = gimple_build_assign (ovf, IMAGPART_EXPR, > build1 (IMAGPART_EXPR, type, ctmp)); >@@ -3808,9 +3850,10 @@ match_uaddsub_overflow (gimple_stmt_iter > if (is_gimple_debug (use_stmt)) > continue; > >- int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt, maxval); >+ int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt, maxval, >NULL); > if (ovf_use == 0) > { >+ gcc_assert (code != BIT_NOT_EXPR); > if (maxval) > { > tree use_lhs = gimple_assign_lhs (use_stmt); >@@ -3863,6 +3906,12 @@ match_uaddsub_overflow (gimple_stmt_iter > gsi_replace (&gsi2, g, true); > } > } >+ else if (code == BIT_NOT_EXPR) >+ { >+ gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt); >+ gsi_remove (&gsi2, true); >+ release_ssa_name (lhs); >+ } > return true; > } > >@@ -4188,6 +4237,10 @@ math_opts_dom_walker::after_dom_children > match_uaddsub_overflow (&gsi, stmt, code); > break; > >+ case BIT_NOT_EXPR: >+ match_uaddsub_overflow (&gsi, stmt, code); >+ break; >+ > case TRUNC_MOD_EXPR: > convert_to_divmod (as_a<gassign *> (stmt)); > break; >--- gcc/testsuite/gcc.dg/tree-ssa/pr96272.c.jj 2020-12-11 >16:30:18.498463346 +0100 >+++ gcc/testsuite/gcc.dg/tree-ssa/pr96272.c 2020-12-11 >16:30:06.344599075 +0100 >@@ -0,0 +1,37 @@ >+/* PR tree-optimization/96272 */ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -fdump-tree-widening_mul" } */ >+ >+unsigned >+foo (unsigned a, unsigned b) >+{ >+ if (a > ~0U - b) >+ return ~0U; >+ return a + b; >+} >+ >+unsigned >+bar (unsigned a, unsigned b) >+{ >+ if (a <= ~0U - b) >+ return ~0U; >+ return a + b; >+} >+ >+unsigned >+baz (unsigned a, unsigned b) >+{ >+ if (~0U - b < a) >+ return ~0U; >+ return a + b; >+} >+ >+unsigned >+qux (unsigned a, unsigned b) >+{ >+ if (~0U - b >= a) >+ return ~0U; >+ return a + b; >+} >+ >+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 4 "widening_mul" { >target { i?86-*-* x86_64-*-* } } } } */ > > Jakub