On Fri, Aug 29, 2025 at 6:33 AM Andrew Pinski <andrew.pin...@oss.qualcomm.com> wrote: > > To better optimize code dealing with `memcmp == 0` where we have > a small constant size, we can inline the memcmp in those cases. > There is code to do this in strlen but that is run too late in > the case where we can figure out the value of one of the arguments > to memcmp. So this copies the optimization to forwprop. > > An example of where this helps is: > ``` > bool cmpvect(const std::vector<int> &a) { return a == std::vector<int>{10}; } > ``` > > Where the above should be optimized to just `return a.size() == 1 && a[0] == > 10;`. > > Note pr44130.c testcase needed to change as now it will be optimized away > otherwise. > Note the loop in pr44130.c os also vectorized which it was not before. > > Note the optimization remains in strlen as the other part (memcmp -> > memcmp_eq) > should move to either isel or fab and I didn't want to remove it just yet. > > Bootstrapped and tested on x86_64-linux-gnu. > > PR tree-optimization/116651 > > gcc/ChangeLog: > > * tree-ssa-forwprop.cc (simplify_builtin_memcmp): New function. > (simplify_builtin_call): Call simplify_builtin_memcmp for memcmp > memcmp_eq builtins. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr44130.c: Add an inline-asm clobber. > * g++.dg/tree-ssa/vector-compare-1.C: New test. > > Signed-off-by: Andrew Pinski <andrew.pin...@oss.qualcomm.com> > --- > .../g++.dg/tree-ssa/vector-compare-1.C | 24 ++++++++ > gcc/testsuite/gcc.target/i386/pr44130.c | 2 + > gcc/tree-ssa-forwprop.cc | 57 +++++++++++++++++++ > 3 files changed, 83 insertions(+) > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C > b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C > new file mode 100644 > index 00000000000..d9b2bc2533e > --- /dev/null > +++ b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C > @@ -0,0 +1,24 @@ > +// { dg-do compile { target c++11 } } > +// { dg-options "-O2 -fdump-tree-optimized" } > + > +// PR tree-optimization/116651 > + > +#include <vector> > + > +bool test1(const std::vector<int>& in) { > + return in == std::vector<int>{24}; > +} > + > +/* We should be to optimize this to: > + int *b = in.bptr; > + int *e = in.eptr; > + auto size = e - b; > + if (size != 4) > + return false; > + int v = *b; > + return v == 24; > + > +*/ > + > +// { dg-final { scan-tree-dump-times "== 24" 1 "optimized" } } */ > +// { dg-final { scan-tree-dump-times "== 4" 1 "optimized" { target int32 } > } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr44130.c > b/gcc/testsuite/gcc.target/i386/pr44130.c > index 2ad740993c1..6269dc89f5e 100644 > --- a/gcc/testsuite/gcc.target/i386/pr44130.c > +++ b/gcc/testsuite/gcc.target/i386/pr44130.c > @@ -21,6 +21,8 @@ void testf (void) > xxxxx[5] = __builtin_copysignf (-0.0, Yf[5]); > xxxxx[6] = __builtin_copysignf (__builtin_inff (), Yf[6]); > xxxxx[7] = __builtin_copysignf (-__builtin_nanf (""), Yf[7]); > + > + asm("":"=m"(xxxxx)); > for (i = 0; i < 8; ++i) > if (__builtin_memcmp (xxxxx+i, Zf+i, sizeof(float)) != 0) > abort (); > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > index 447004ef048..f5cd3a8f390 100644 > --- a/gcc/tree-ssa-forwprop.cc > +++ b/gcc/tree-ssa-forwprop.cc > @@ -1593,6 +1593,60 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip) > return changed; > } > > +/* Optimizes builtin memcmps for small constant sizes. > + GSI_P is the GSI for the call. STMT is the call itself. > + */ > + > +static bool > +simplify_builtin_memcmp (gimple_stmt_iterator *gsi_p, gcall *stmt) > +{ > + tree res = gimple_call_lhs (stmt); > + > + if (!res || !use_in_zero_equality (res)) > + return false; > + tree arg1 = gimple_call_arg (stmt, 0); > + tree arg2 = gimple_call_arg (stmt, 1); > + tree len = gimple_call_arg (stmt, 2);
you need to check the number of arguments are actually here and arg1/2 are pointers. Otherwise looks OK to me. Thanks, Richard. > + unsigned HOST_WIDE_INT leni; > + > + if (tree_fits_uhwi_p (len) > + && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode) > + && pow2p_hwi (leni)) > + { > + leni *= CHAR_TYPE_SIZE; > + unsigned align1 = get_pointer_alignment (arg1); > + unsigned align2 = get_pointer_alignment (arg2); > + unsigned align = MIN (align1, align2); > + scalar_int_mode mode; > + if (int_mode_for_size (leni, 1).exists (&mode) > + && (align >= leni || !targetm.slow_unaligned_access (mode, align))) > + { > + location_t loc = gimple_location (stmt); > + tree type, off; > + type = build_nonstandard_integer_type (leni, 1); > + gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni)); > + tree ptrtype = build_pointer_type_for_mode (char_type_node, > + ptr_mode, true); > + off = build_int_cst (ptrtype, 0); > + arg1 = build2_loc (loc, MEM_REF, type, arg1, off); > + arg2 = build2_loc (loc, MEM_REF, type, arg2, off); > + tree tem1 = fold_const_aggregate_ref (arg1); > + if (tem1) > + arg1 = tem1; > + tree tem2 = fold_const_aggregate_ref (arg2); > + if (tem2) > + arg2 = tem2; > + res = fold_convert_loc (loc, TREE_TYPE (res), > + fold_build2_loc (loc, NE_EXPR, > + boolean_type_node, > + arg1, arg2)); > + gimplify_and_update_call_from_tree (gsi_p, res); > + return true; > + } > + } > + return false; > +} > + > /* *GSI_P is a GIMPLE_CALL to a builtin function. > Optimize > memcpy (p, "abcd", 4); > @@ -1630,6 +1684,9 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, > tree callee2, bool full_walk > > switch (DECL_FUNCTION_CODE (callee2)) > { > + case BUILT_IN_MEMCMP: > + case BUILT_IN_MEMCMP_EQ: > + return simplify_builtin_memcmp (gsi_p, as_a<gcall*>(stmt2)); > case BUILT_IN_MEMCHR: > if (gimple_call_num_args (stmt2) == 3 > && (res = gimple_call_lhs (stmt2)) != nullptr > -- > 2.43.0 >