On Thu, Jul 7, 2022 at 6:45 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > When memchr is applied on a constant string of no more than the bytes of > a word, simplify memchr by checking each byte in the constant string. > > int f (int a) > { > return __builtin_memchr ("AE", a, 2) != 0; > } > > is simplified to > > int f (int a) > { > return ((char) a == 'A' || (char) a == 'E') != 0; > } > > gcc/ > > PR tree-optimization/103798 > * tree-ssa-forwprop.cc: Include "tree-ssa-strlen.h". > (simplify_builtin_call): Inline memchr with constant strings of > no more than the bytes of a word. > * tree-ssa-strlen.cc (use_in_zero_equality): Make it global. > * tree-ssa-strlen.h (use_in_zero_equality): New. > > gcc/testsuite/ > > PR tree-optimization/103798 > * c-c++-common/pr103798-1.c: New test. > * c-c++-common/pr103798-2.c: Likewise. > * c-c++-common/pr103798-3.c: Likewise. > * c-c++-common/pr103798-4.c: Likewise. > * c-c++-common/pr103798-5.c: Likewise. > * c-c++-common/pr103798-6.c: Likewise. > * c-c++-common/pr103798-7.c: Likewise. > * c-c++-common/pr103798-8.c: Likewise. > --- > gcc/testsuite/c-c++-common/pr103798-1.c | 28 +++++++++++ > gcc/testsuite/c-c++-common/pr103798-2.c | 30 ++++++++++++ > gcc/testsuite/c-c++-common/pr103798-3.c | 28 +++++++++++ > gcc/testsuite/c-c++-common/pr103798-4.c | 28 +++++++++++ > gcc/testsuite/c-c++-common/pr103798-5.c | 26 ++++++++++ > gcc/testsuite/c-c++-common/pr103798-6.c | 27 +++++++++++ > gcc/testsuite/c-c++-common/pr103798-7.c | 27 +++++++++++ > gcc/testsuite/c-c++-common/pr103798-8.c | 27 +++++++++++ > gcc/tree-ssa-forwprop.cc | 64 +++++++++++++++++++++++++ > gcc/tree-ssa-strlen.cc | 4 +- > gcc/tree-ssa-strlen.h | 2 + > 11 files changed, 289 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/c-c++-common/pr103798-1.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-2.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-3.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-4.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-5.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-6.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-7.c > create mode 100644 gcc/testsuite/c-c++-common/pr103798-8.c > > diff --git a/gcc/testsuite/c-c++-common/pr103798-1.c > b/gcc/testsuite/c-c++-common/pr103798-1.c > new file mode 100644 > index 00000000000..cd3edf569fc > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-1.c > @@ -0,0 +1,28 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int > +f (char a) > +{ > + return __builtin_memchr ("a", a, 1) == 0; > +} > + > +__attribute__ ((weak)) > +int > +g (char a) > +{ > + return a != 'a'; > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i) != g (i)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-2.c > b/gcc/testsuite/c-c++-common/pr103798-2.c > new file mode 100644 > index 00000000000..e7e99c3679e > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-2.c > @@ -0,0 +1,30 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +#include <string.h> > + > +__attribute__ ((weak)) > +int > +f (int a) > +{ > + return memchr ("aE", a, 2) != NULL; > +} > + > +__attribute__ ((weak)) > +int > +g (char a) > +{ > + return a == 'a' || a == 'E'; > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i + 256) != g (i + 256)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-3.c > b/gcc/testsuite/c-c++-common/pr103798-3.c > new file mode 100644 > index 00000000000..ddcedc7e238 > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-3.c > @@ -0,0 +1,28 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int > +f (char a) > +{ > + return __builtin_memchr ("aEgZ", a, 3) == 0; > +} > + > +__attribute__ ((weak)) > +int > +g (char a) > +{ > + return a != 'a' && a != 'E' && a != 'g'; > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i) != g (i)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-4.c > b/gcc/testsuite/c-c++-common/pr103798-4.c > new file mode 100644 > index 00000000000..00e8302a833 > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-4.c > @@ -0,0 +1,28 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int > +f (char a) > +{ > + return __builtin_memchr ("aEgi", a, 4) != 0; > +} > + > +__attribute__ ((weak)) > +int > +g (char a) > +{ > + return a == 'a' || a == 'E' || a == 'g' || a == 'i'; > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i) != g (i)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-5.c > b/gcc/testsuite/c-c++-common/pr103798-5.c > new file mode 100644 > index 00000000000..0d6487a13df > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-5.c > @@ -0,0 +1,26 @@ > +/* { dg-do run { target int128 } } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int f(char a) > +{ > + return __builtin_memchr ("aEgiH", a, 5) == 0; > +} > + > +__attribute__ ((weak)) > +int g(char a) > +{ > + return a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H'; > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i) != g (i)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-6.c > b/gcc/testsuite/c-c++-common/pr103798-6.c > new file mode 100644 > index 00000000000..5ccb5ee66e0 > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-6.c > @@ -0,0 +1,27 @@ > +/* { dg-do run { target int128 } } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int f(char a) > +{ > + return __builtin_memchr ("aEgiHx", a, 6) != 0; > +} > + > +__attribute__ ((weak)) > +int g(char a) > +{ > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > + || a == 'x'); > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i) != g (i)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-7.c > b/gcc/testsuite/c-c++-common/pr103798-7.c > new file mode 100644 > index 00000000000..40fd38257d1 > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-7.c > @@ -0,0 +1,27 @@ > +/* { dg-do run { target int128 } } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int f(char a) > +{ > + return __builtin_memchr ("aEgiHjZ", a, 7) == 0; > +} > + > +__attribute__ ((weak)) > +int g(char a) > +{ > + return (a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H' > + && a != 'j' && a != 'Z'); > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i) != g (i)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/testsuite/c-c++-common/pr103798-8.c > b/gcc/testsuite/c-c++-common/pr103798-8.c > new file mode 100644 > index 00000000000..0841b18cea4 > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/pr103798-8.c > @@ -0,0 +1,27 @@ > +/* { dg-do run { target int128 } } */ > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > + > +__attribute__ ((weak)) > +int f(int a) > +{ > + return __builtin_memchr ("aEgiHx19ABC", a, 8) != 0; > +} > + > +__attribute__ ((weak)) > +int g(char a) > +{ > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > + || a == 'x' || a == '1' || a == '9'); > +} > + > +int > +main () > +{ > + for (int i = 0; i < 255; i++) > + if (f (i + 256) != g (i + 256)) > + __builtin_abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-assembler-not "memchr" } } */ > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > index 69567ab3275..edd1277c23b 100644 > --- a/gcc/tree-ssa-forwprop.cc > +++ b/gcc/tree-ssa-forwprop.cc > @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-dfa.h" > #include "tree-ssa-propagate.h" > #include "tree-ssa-dom.h" > +#include "tree-ssa-strlen.h" > #include "builtins.h" > #include "tree-cfgcleanup.h" > #include "cfganal.h" > @@ -1177,6 +1178,15 @@ constant_pointer_difference (tree p1, tree p2) > memcpy (p, "abcd ", 7); > call if the latter can be stored by pieces during expansion. > > + Optimize > + memchr ("abcd", a, 4) == 0; > + or > + memchr ("abcd", a, 4) != 0; > + to > + (a == 'a' || a == 'b' || a == 'c' || a == 'd') == 0 > + or > + (a == 'a' || a == 'b' || a == 'c' || a == 'd') != 0 > + > Also canonicalize __atomic_fetch_op (p, x, y) op x > to __atomic_op_fetch (p, x, y) or > __atomic_op_fetch (p, x, y) iop x > @@ -1193,8 +1203,62 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, > tree callee2) > return false; > stmt1 = SSA_NAME_DEF_STMT (vuse); > > + tree res; > + > switch (DECL_FUNCTION_CODE (callee2)) > { > + case BUILT_IN_MEMCHR: > + if (gimple_call_num_args (stmt2) == 3 > + && (res = gimple_call_lhs (stmt2)) != nullptr > + && use_in_zero_equality (res) != nullptr > + && CHAR_BIT == 8 > + && BITS_PER_UNIT == 8) > + { > + tree ptr = gimple_call_arg (stmt2, 0); > + if (TREE_CODE (ptr) != ADDR_EXPR > + || TREE_CODE (TREE_OPERAND (ptr, 0)) != STRING_CST) > + break; > + unsigned HOST_WIDE_INT slen > + = TREE_STRING_LENGTH (TREE_OPERAND (ptr, 0)); > + /* It must be a non-empty string constant. */ > + if (slen < 2) > + break; > + tree size = gimple_call_arg (stmt2, 2); > + /* Size must be a constant which is <= UNITS_PER_WORD and > + <= the string length. */ > + if (TREE_CODE (size) != INTEGER_CST > + || integer_zerop (size) > + || wi::gtu_p (wi::to_wide (size), UNITS_PER_WORD) > + || wi::geu_p (wi::to_wide (size), slen))
it might be convenient to check tree_fits_uhwi_p (size) and do unsigned HOST_WIDE_INT sz = tree_to_uhwi (size); instead of playing with wide-int here. > + break; > + tree ch = gimple_call_arg (stmt2, 1); > + location_t loc = gimple_location (stmt2); > + if (!useless_type_conversion_p (char_type_node, > + TREE_TYPE (ch))) > + ch = fold_convert_loc (loc, char_type_node, ch); > + const char *p = TREE_STRING_POINTER (TREE_OPERAND (ptr, 0)); > + unsigned int isize = TREE_INT_CST_LOW (size); > + tree *op = new tree[isize]; > + for (unsigned int i = 0; i < isize; i++) > + { > + op[i] = build_int_cst (char_type_node, p[i]); > + op[i] = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, > + op[i], ch); > + } > + for (unsigned int i = isize - 1; i >= 1; i--) > + op[i - 1] = fold_convert_loc (loc, boolean_type_node, > + fold_build2_loc (loc, > + BIT_IOR_EXPR, > + boolean_type_node, > + op[i - 1], > + op[i])); > + res = fold_convert_loc (loc, TREE_TYPE (res), op[0]); > + gimplify_and_update_call_from_tree (gsi_p, res); my worry about -Os remains, can you please add a check like optimize_bb_for_speed (gimple_bb (stmt2)) || sz <= 2 (so we allow inline expanding two comparisons when not optimizing for speed)? For UNITS_PER_WORD == 8 this is possibly a lot of compares, don't we have some _BY_PIECEs limit we can use here? There's no COMPARE_RATIO it seems, not sure what we use there. Since we only inline expand memchr(..) != 0 we're leaving 'a' != x | 'b' != x | ... optimization to possibly 'ab...' != (x|x<<8|x<<12|...) for followup (I'm quite sure we don't do anything like that). The x "splat" needs log2 (sz) operations (but they might be slow?), but in the end it might pay off for power-of-two (sz) (one could even do this in chunks)? Anyway, this might be for a followup. Otherwise it looks OK. Thanks, Richard. > + delete[] op; > + return true; > + } > + break; > + > case BUILT_IN_MEMSET: > if (gimple_call_num_args (stmt2) != 3 > || gimple_call_lhs (stmt2) > diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc > index 7b3e3899ea2..5afbae1b72e 100644 > --- a/gcc/tree-ssa-strlen.cc > +++ b/gcc/tree-ssa-strlen.cc > @@ -3913,8 +3913,8 @@ strlen_pass::handle_builtin_memset (bool *zero_write) > nonnull if and only RES is used in such expressions exclusively and > in none other. */ > > -static gimple * > -use_in_zero_equality (tree res, bool exclusive = true) > +gimple * > +use_in_zero_equality (tree res, bool exclusive) > { > gimple *first_use = NULL; > > diff --git a/gcc/tree-ssa-strlen.h b/gcc/tree-ssa-strlen.h > index 8d155450db8..fdb4d9d7783 100644 > --- a/gcc/tree-ssa-strlen.h > +++ b/gcc/tree-ssa-strlen.h > @@ -35,6 +35,8 @@ struct c_strlen_data; > extern void get_range_strlen_dynamic (tree, gimple *, c_strlen_data *, > pointer_query &); > > +extern gimple *use_in_zero_equality (tree, bool = true); > + > /* APIs internal to strlen pass. Defined in gimple-ssa-sprintf.cc. */ > extern bool handle_printf_call (gimple_stmt_iterator *, pointer_query &); > > -- > 2.36.1 >