On 9 December 2016 at 17:59, Jakub Jelinek <ja...@redhat.com> wrote:
> On Fri, Dec 09, 2016 at 05:36:41PM +0530, Prathamesh Kulkarni wrote:
>> --- a/gcc/tree-ssa-strlen.c
>> +++ b/gcc/tree-ssa-strlen.c
>> @@ -2302,7 +2302,81 @@ strlen_optimize_stmt (gimple_stmt_iterator *gsi)
>>         else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
>>           handle_pointer_plus (gsi);
>>       }
>> -      else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
>> +
>> +      /* Fold strstr (s, t) == s to memcmp (s, t, strlen (t)) == 0.
>> +      if strlen (t) is known and var holding return value of strstr
>> +      has single use.  */
>> +
>> +      else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (TREE_TYPE 
>> (lhs)))
>> +     {
>> +       enum tree_code code = gimple_assign_rhs_code (stmt);
>> +       if (code == EQ_EXPR || code == NE_EXPR)
>
> This way you handle _8 = _5 == _7;, but not if (_5 == _7) bar ();.  Shouldn't 
> you
> also handle GIMPLE_COND similarly (of course, the rhs1 and rhs2 grabbing
> and code grabbing is different for GIMPLE_COND.  But the rest should be
> the same, except again that you don't want to replace the GIMPLE_COND, but
> adjust it.  Maybe also COND_EXPR in gimple_assign (_9 = _5 == _7 ? _10 : 
> _11;).
>
>> +         {
>> +           tree rhs1 = gimple_assign_rhs1 (stmt);
>> +           tree rhs2 = gimple_assign_rhs2 (stmt);
>> +           if (TREE_CODE (rhs1) == SSA_NAME
>> +               && TREE_CODE (rhs2) == SSA_NAME)
>> +             {
>> +               gcall *call_stmt = dyn_cast<gcall *> (SSA_NAME_DEF_STMT 
>> (rhs1));
>> +               if (!call_stmt)
>> +                 {
>> +                   call_stmt = dyn_cast<gcall *> (SSA_NAME_DEF_STMT (rhs2));
>> +                   tree tmp = rhs1;
>> +                   rhs1 = rhs2;
>> +                   rhs2 = tmp;
>
> We use std::swap (rhs1, rhs2); in this case these days.
>
>> +                 }
>> +
>> +               tree call_lhs;
>> +               if (call_stmt
>> +                   && gimple_call_builtin_p (call_stmt, BUILT_IN_STRSTR)
>> +                   && (call_lhs = gimple_call_lhs (call_stmt))
>> +                   && has_single_use (call_lhs))
>
> This might not optimize if you have:
>   _5 = foo ();
>   _7 = __builtin_strstr (_5, "abcd");
>   _8 = _5 == _7;
>
> Or even you could have:
>   _5 = __builtin_strstr (...);
>   _7 = __builtin_strstr (_5, "abcd");
>   _8 = _5 == _7;
>
> So I wonder if you shouldn't do:
>                   gimple *call_stmt = NULL;
>                   for (int pass = 0; pass < 2; pass++)
>                     {
>                       gimple *g = SSA_NAME_DEF_STMT (rhs1);
>                       if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
>                           && gimple_call_lhs (g) == rhs1
>                           && has_single_use (rhs1)
>                           && gimple_call_arg (g, 0) == rhs2)
>                         {
>                           call_stmt = g;
>                           break;
>                         }
>                       std::swap (rhs1, rhs2);
>                     }
>                   if (call_stmt)
> ...
>
> I think you don't need operand_equal_p, because SSA_NAMEs should just
> be the same pointer if they are the same thing.
> The above way you handle both orderings.  Perhaps also it is big enough to
> be done in a separate function, which you call with the code/rhs1/rhs2 and
> stmt for the EQ/NE_EXPR is_gimple_assign as well as for COND_EXPR and
> GIMPLE_COND.
Hi Jakub,
Thanks for the suggestions. It didn't occur to me to check for gimple_cond.
I have tried to do the changes in the attached version.
I am not sure if I have handled cond_expr correctly.
IIUC, if gimple_assign has code cond_expr, then the condition is
stored in gimple_assign_rhs1,
however it's not a single operand but a tree of the form "op1 cond_code op2".
Is that correct ?

However I am not able to write a test-case that generates cond_expr in the IL.
I tried:
t1 = strstr (s, t);
(t1 == s)  ? foo() : bar ();
and other such variants but it seems the ?: operator is getting
lowered to gimple_cond instead.

Bootstrap+tested on x86_64-unknown-linux-gnu
and cross-tested on arm*-*-*, aarch64*-*-*.
Does it look OK ?

Thanks,
Prathamesh
>
>         Jakub
2016-12-13  Jakub Jelinek  <ja...@redhat.com>
            Prathamesh Kulkarni  <prathamesh.kulka...@linaro.org>

        * tree-ssa-strlen.c (fold_strstr_to_memcmp): New function.
        (strlen_optimize_stmt): Call fold_strstr_to_memcmp.

testsuite/
        * gcc.dg/strlenopt-30.c: New test-case.

diff --git a/gcc/testsuite/gcc.dg/strlenopt-30.c 
b/gcc/testsuite/gcc.dg/strlenopt-30.c
new file mode 100644
index 0000000..089b3a2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-30.c
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-strlen" } */
+
+__attribute__((no_icf))
+_Bool f1(char *s)
+{
+  return __builtin_strstr (s, "hello") == s;
+}
+
+__attribute__((no_icf))
+_Bool f2(char *s)
+{
+  return s == __builtin_strstr (s, "hello");
+}
+
+__attribute__((no_icf))
+_Bool f3(char *s)
+{
+  return s != __builtin_strstr (s, "hello");
+}
+
+__attribute__((no_icf))
+_Bool f4()
+{
+  char *foo_f4(void);
+  char *t1 = foo_f4();
+  char *t2 = __builtin_strstr (t1, "hello");
+  _Bool t3 = t2 == t1;
+  return t3;
+}
+
+__attribute__((no_icf))
+void f5(char *s)
+{
+  char *t1 = __builtin_strstr (s, "hello");
+  void foo_f5(void);
+  if (t1 != s)
+    foo_f5();
+}
+
+/* Do not perform transform, since strlen (t)
+   is unknown.  */
+
+__attribute__((no_icf))
+_Bool f6(char *s, char *t)
+{
+  return __builtin_strstr (s, t) == s;
+}
+
+/* Do not perform transform in this case, since
+   t1 doesn't have single use.  */
+
+__attribute__((no_icf))
+_Bool f7(char *s)
+{
+  void foo_f7(char *);
+
+  char *t1 = __builtin_strstr (s, "hello");
+  foo_f7 (t1);
+  return (t1 == s);
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_memcmp" 5 "strlen" } } */
diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index 339812e..a4a0ae1 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -2222,6 +2222,93 @@ handle_char_store (gimple_stmt_iterator *gsi)
   return true;
 }
 
+/* Try to fold strstr (s, t) == s to memcmp (s, t, strlen (t)) == 0.  */
+
+static void
+fold_strstr_to_memcmp(enum tree_code code, tree rhs1, tree rhs2, gimple *stmt)
+{
+  gimple *call_stmt = NULL;
+  for (int pass = 0; pass < 2; pass++)
+    {
+      gimple *g = SSA_NAME_DEF_STMT (rhs1);
+      if (g
+         && gimple_call_builtin_p (g, BUILT_IN_STRSTR)
+         && has_single_use (rhs1)
+         && gimple_call_arg (as_a<gcall *> (g), 0) == rhs2)
+       {
+         call_stmt = g;
+         break;
+       }
+      std::swap (rhs1, rhs2);
+    }
+
+  if (call_stmt)
+    {
+      tree arg0 = gimple_call_arg (call_stmt, 0);
+
+      if (arg0 == rhs2)
+       {
+         tree arg1 = gimple_call_arg (call_stmt, 1);
+         tree arg1_len = NULL_TREE;
+         int idx = get_stridx (arg1);
+
+         if (idx)
+           {
+             if (idx < 0)
+               arg1_len = build_int_cst (size_type_node, ~idx);
+             else
+               {
+                 strinfo *si = get_strinfo (idx);
+                 if (si)
+                   arg1_len = get_string_length (si);
+               }
+           }
+
+         if (arg1_len != NULL_TREE)
+           {
+             gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
+             tree memcmp_decl = builtin_decl_explicit (BUILT_IN_MEMCMP);
+             gcall *memcmp_call = gimple_build_call (memcmp_decl, 3,
+                                                     arg0, arg1, arg1_len);
+             tree memcmp_lhs = make_ssa_name (integer_type_node);
+             gimple_set_vuse (memcmp_call, gimple_vuse (call_stmt));
+             gimple_call_set_lhs (memcmp_call, memcmp_lhs);
+             gsi_remove (&gsi, true);
+             gsi_insert_before (&gsi, memcmp_call, GSI_SAME_STMT);
+             tree zero = build_zero_cst (TREE_TYPE (memcmp_lhs));
+
+             if (is_gimple_assign (stmt))
+               {
+                 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
+                   {
+                     tree cond = gimple_assign_rhs1 (stmt);
+                     TREE_SET_CODE (cond, EQ_EXPR);
+                     TREE_OPERAND (cond, 0) = memcmp_lhs;
+                     TREE_OPERAND (cond, 1) = zero;
+                     update_stmt (stmt);
+                   }
+                 else
+                   {
+                     gsi = gsi_for_stmt (stmt);
+                     tree lhs = gimple_assign_lhs (stmt);
+                     gassign *ga = gimple_build_assign (lhs, code, memcmp_lhs,
+                                                        zero);
+                     gsi_replace (&gsi, ga, false);
+                   }
+               }
+             else
+               {
+                 gcond *cond = as_a<gcond *> (stmt);
+                 gimple_cond_set_lhs (cond, memcmp_lhs);
+                 gimple_cond_set_rhs (cond, zero);
+                 gimple_cond_set_code (cond, EQ_EXPR);
+                 update_stmt (cond);
+               }
+           }
+       }
+    }
+}
+
 /* Attempt to optimize a single statement at *GSI using string length
    knowledge.  */
 
@@ -2302,7 +2389,34 @@ strlen_optimize_stmt (gimple_stmt_iterator *gsi)
          else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
            handle_pointer_plus (gsi);
        }
-      else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
+    else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
+      {
+       enum tree_code code = gimple_assign_rhs_code (stmt);
+       if (code == COND_EXPR)
+         {
+           tree cond = gimple_assign_rhs1 (stmt);
+           enum tree_code cond_code = TREE_CODE (cond);
+
+           if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
+             {
+               tree rhs1 = TREE_OPERAND (cond, 0);
+               tree rhs2 = TREE_OPERAND (cond, 1);
+               if (TREE_CODE (rhs1) == SSA_NAME
+                   && TREE_CODE (rhs2) == SSA_NAME)
+                 fold_strstr_to_memcmp (cond_code, rhs1, rhs2, stmt);
+             }
+         }
+       else if (code == EQ_EXPR || code == NE_EXPR)
+         {
+           tree rhs1 = gimple_assign_rhs1 (stmt);
+           tree rhs2 = gimple_assign_rhs2 (stmt);
+
+           if (TREE_CODE (rhs1) == SSA_NAME
+               && TREE_CODE (rhs2) == SSA_NAME)
+             fold_strstr_to_memcmp (code, rhs1, rhs2, stmt);
+         }
+      }
+    else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
        {
          tree type = TREE_TYPE (lhs);
          if (TREE_CODE (type) == ARRAY_TYPE)
@@ -2316,6 +2430,17 @@ strlen_optimize_stmt (gimple_stmt_iterator *gsi)
            }
        }
     }
+  else if (gcond *cond = dyn_cast<gcond *> (stmt))
+    {
+      enum tree_code code = gimple_cond_code (cond);
+      tree lhs = gimple_cond_lhs (stmt);
+      tree rhs = gimple_cond_rhs (stmt);
+
+      if ((code == EQ_EXPR || code == NE_EXPR)
+         && TREE_CODE (lhs) == SSA_NAME
+         && TREE_CODE (rhs) == SSA_NAME)
+       fold_strstr_to_memcmp (code, lhs, rhs, stmt);
+    }
 
   if (gimple_vdef (stmt))
     maybe_invalidate (stmt);

Reply via email to