On Fri, Aug 29, 2025 at 6:33 AM Andrew Pinski
<andrew.pin...@oss.qualcomm.com> wrote:
>
> To better optimize code dealing with `memcmp == 0` where we have
> a small constant size, we can inline the memcmp in those cases.
> There is code to do this in strlen but that is run too late in
> the case where we can figure out the value of one of the arguments
> to memcmp. So this copies the optimization to forwprop.
>
> An example of where this helps is:
> ```
> bool cmpvect(const std::vector<int> &a) { return a == std::vector<int>{10}; }
> ```
>
> Where the above should be optimized to just `return a.size() == 1 && a[0] == 
> 10;`.
>
> Note pr44130.c testcase needed to change as now it will be optimized away 
> otherwise.
> Note the loop in pr44130.c os also vectorized which it was not before.
>
> Note the optimization remains in strlen as the other part (memcmp -> 
> memcmp_eq)
> should move to either isel or fab and I didn't want to remove it just yet.
>
> Bootstrapped and tested on x86_64-linux-gnu.
>
>         PR tree-optimization/116651
>
> gcc/ChangeLog:
>
>         * tree-ssa-forwprop.cc (simplify_builtin_memcmp): New function.
>         (simplify_builtin_call): Call simplify_builtin_memcmp for memcmp
>         memcmp_eq builtins.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr44130.c: Add an inline-asm clobber.
>         * g++.dg/tree-ssa/vector-compare-1.C: New test.
>
> Signed-off-by: Andrew Pinski <andrew.pin...@oss.qualcomm.com>
> ---
>  .../g++.dg/tree-ssa/vector-compare-1.C        | 24 ++++++++
>  gcc/testsuite/gcc.target/i386/pr44130.c       |  2 +
>  gcc/tree-ssa-forwprop.cc                      | 57 +++++++++++++++++++
>  3 files changed, 83 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C
>
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C 
> b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C
> new file mode 100644
> index 00000000000..d9b2bc2533e
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C
> @@ -0,0 +1,24 @@
> +// { dg-do compile { target c++11 } }
> +// { dg-options "-O2 -fdump-tree-optimized" }
> +
> +// PR tree-optimization/116651
> +
> +#include <vector>
> +
> +bool test1(const std::vector<int>& in) {
> +    return in == std::vector<int>{24};
> +}
> +
> +/* We should be to optimize this to:
> +   int *b = in.bptr;
> +   int *e = in.eptr;
> +   auto size = e - b;
> +   if (size != 4)
> +     return false;
> +   int v = *b;
> +   return v == 24;
> +
> +*/
> +
> +// { dg-final { scan-tree-dump-times "== 24" 1 "optimized" } } */
> +// { dg-final { scan-tree-dump-times "== 4" 1 "optimized"  { target int32 } 
> } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr44130.c 
> b/gcc/testsuite/gcc.target/i386/pr44130.c
> index 2ad740993c1..6269dc89f5e 100644
> --- a/gcc/testsuite/gcc.target/i386/pr44130.c
> +++ b/gcc/testsuite/gcc.target/i386/pr44130.c
> @@ -21,6 +21,8 @@ void testf (void)
>    xxxxx[5] = __builtin_copysignf (-0.0, Yf[5]);
>    xxxxx[6] = __builtin_copysignf (__builtin_inff (), Yf[6]);
>    xxxxx[7] = __builtin_copysignf (-__builtin_nanf (""), Yf[7]);
> +
> +  asm("":"=m"(xxxxx));
>    for (i = 0; i < 8; ++i)
>      if (__builtin_memcmp (xxxxx+i, Zf+i, sizeof(float)) != 0)
>        abort ();
> diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> index 447004ef048..f5cd3a8f390 100644
> --- a/gcc/tree-ssa-forwprop.cc
> +++ b/gcc/tree-ssa-forwprop.cc
> @@ -1593,6 +1593,60 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip)
>    return changed;
>  }
>
> +/* Optimizes builtin memcmps for small constant sizes.
> +   GSI_P is the GSI for the call. STMT is the call itself.
> +   */
> +
> +static bool
> +simplify_builtin_memcmp (gimple_stmt_iterator *gsi_p, gcall *stmt)
> +{
> +  tree res = gimple_call_lhs (stmt);
> +
> +  if (!res || !use_in_zero_equality (res))
> +    return false;
> +  tree arg1 = gimple_call_arg (stmt, 0);
> +  tree arg2 = gimple_call_arg (stmt, 1);
> +  tree len = gimple_call_arg (stmt, 2);

you need to check the number of arguments are actually here and
arg1/2 are pointers.

Otherwise looks OK to me.

Thanks,
Richard.

> +  unsigned HOST_WIDE_INT leni;
> +
> +  if (tree_fits_uhwi_p (len)
> +      && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
> +      && pow2p_hwi (leni))
> +    {
> +      leni *= CHAR_TYPE_SIZE;
> +      unsigned align1 = get_pointer_alignment (arg1);
> +      unsigned align2 = get_pointer_alignment (arg2);
> +      unsigned align = MIN (align1, align2);
> +      scalar_int_mode mode;
> +      if (int_mode_for_size (leni, 1).exists (&mode)
> +         && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
> +       {
> +         location_t loc = gimple_location (stmt);
> +         tree type, off;
> +         type = build_nonstandard_integer_type (leni, 1);
> +         gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
> +         tree ptrtype = build_pointer_type_for_mode (char_type_node,
> +                                                     ptr_mode, true);
> +         off = build_int_cst (ptrtype, 0);
> +         arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
> +         arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
> +         tree tem1 = fold_const_aggregate_ref (arg1);
> +         if (tem1)
> +           arg1 = tem1;
> +         tree tem2 = fold_const_aggregate_ref (arg2);
> +         if (tem2)
> +           arg2 = tem2;
> +         res = fold_convert_loc (loc, TREE_TYPE (res),
> +                                 fold_build2_loc (loc, NE_EXPR,
> +                                                  boolean_type_node,
> +                                                  arg1, arg2));
> +         gimplify_and_update_call_from_tree (gsi_p, res);
> +         return true;
> +       }
> +    }
> +  return false;
> +}
> +
>  /* *GSI_P is a GIMPLE_CALL to a builtin function.
>     Optimize
>     memcpy (p, "abcd", 4);
> @@ -1630,6 +1684,9 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, 
> tree callee2, bool full_walk
>
>    switch (DECL_FUNCTION_CODE (callee2))
>      {
> +    case BUILT_IN_MEMCMP:
> +    case BUILT_IN_MEMCMP_EQ:
> +      return simplify_builtin_memcmp (gsi_p, as_a<gcall*>(stmt2));
>      case BUILT_IN_MEMCHR:
>        if (gimple_call_num_args (stmt2) == 3
>           && (res = gimple_call_lhs (stmt2)) != nullptr
> --
> 2.43.0
>

Reply via email to