Hi!

The testcase in the following patches is miscompiled, because
ICF only uses operand_equal_p to compare operands and when that
sees an ADDR_EXPR, it turns on OEP_ADDRESS_OF mode which only cares
about the exact value instead of checking the types etc.

Unfortunately, get_range_strlen/get_range_strlen_tree (and perhaps
other spots; maybe objsz too, though that should at least have some
stuff caught during objsz1 before IPA) actually derive information
from the ADDR_EXPR operand, so if we have say
  _1 = &this_5(D)->a;
in two functions and this_5 points to in one case to struct with
char a[11]; member at offset 0 and in another case with
char a[128]; at offset 0, those functions argue the maximum length
of the string pointed by _1 is 10 chars + nul or 127 chars + nul
in those cases.

Arguably that is a ticking bomb, I would imagine if SCCVN sees
two such pointers in one function that it will just CSE those and
whatever was the first one wins (at least after IPA), I think we've
repeatedly told the author of that code not to do that.

Anyway, the following patches attempt to just punt in ICF so that
this information is preserved.

I've done 3 x86_64-linux and i686-linux bootstraps/regtests, each time
with the 3rd patch to gather statistics on number of successful ICF function
merges, and once with no further patches, once with the first patch and
once with the second patch instead of the first.

The numbers of successful ICF function merges across the 2
bootstraps/regtests are
vanilla 175170
first   168617
second  168858
So, the second patch causes slightly more successful ICF merges over the
first one, but only tiny bit, for the first patch it is ~3.75% fewer
ICF merges, for the second patch ~3.6% fewer ICF merges.

Guess another option would be to somehow try to be conservative about such
cases, but for ICF that sounds really hard, how do we figure out that we
need to adjust something in the chosen candidate and what exactly in it.
And for SCCVN how to arrange to modify the chosen winner so that it is
conservatively ok for both the merged cases.

        Jakub
2025-02-27  Jakub Jelinek  <ja...@redhat.com>

        PR ipa/119006
        * ipa-icf-gimple.cc (func_checker::compare_operand): If t1 and t2
        are ADDR_EXPRs, call operand_equal_p on their operands rather than on
        the ADDR_EXPRs themselves.  Formatting fix.

        * g++.dg/opt/pr119006.C: New test.

--- gcc/ipa-icf-gimple.cc.jj    2025-02-01 00:50:02.080774328 +0100
+++ gcc/ipa-icf-gimple.cc       2025-02-27 14:35:19.931183246 +0100
@@ -437,12 +437,23 @@ func_checker::compare_operand (tree t1,
                 ("compare_ao_refs failed (dependence clique difference)");
       gcc_unreachable ();
     }
+  else if (TREE_CODE (t1) == ADDR_EXPR && TREE_CODE (t2) == ADDR_EXPR)
+    {
+      /* For ADDR_EXPR compare the operands of the ADDR_EXPR rather than
+        the ADDR_EXPRs themselves.  operand_equal_p will compare the
+        operands with OEP_ADDRESS_OF and only care about the value
+        of the ADDR_EXPR, rather than e.g. types of MEM_REFs in there.
+        Some optimizations use such details though, see PR119006.  */
+      if (operand_equal_p (TREE_OPERAND (t1, 0), TREE_OPERAND (t2, 0),
+                          OEP_MATCH_SIDE_EFFECTS))
+       return true;
+      return return_false_with_msg ("operand_equal_p failed");
+    }
   else
     {
       if (operand_equal_p (t1, t2, OEP_MATCH_SIDE_EFFECTS))
        return true;
-      return return_false_with_msg
-                ("operand_equal_p failed");
+      return return_false_with_msg ("operand_equal_p failed");
     }
 }
 
--- gcc/testsuite/g++.dg/opt/pr119006.C.jj      2025-02-27 14:37:05.952707350 
+0100
+++ gcc/testsuite/g++.dg/opt/pr119006.C 2025-02-27 14:36:29.251218260 +0100
@@ -0,0 +1,36 @@
+// PR ipa/119006
+// { dg-do run { target c++11 } }
+// { dg-options "-O2 -fwhole-program" }
+
+struct A {
+  bool operator== (const char *x) const { return x && !__builtin_strcmp (a, 
x); }
+  char a[11];
+};
+
+struct B {
+  bool operator== (const char *x) const { return x && !__builtin_strcmp (a, 
x); }
+  bool operator!= (const char *x) const { return !(*this == x); }
+  char a[128];
+};
+
+[[gnu::noinline,gnu::used]] int
+foo (const A& lhs, const char* rhs)
+{
+  return lhs == rhs;
+}
+
+constexpr const char *t = "abcdefghijklmno";
+
+[[gnu::noinline,gnu::used]] void
+bar (B x)
+{
+  if (x != t) __builtin_abort ();
+}
+
+int
+main ()
+{
+  B b;
+  __builtin_strcpy (b.a, t);
+  bar (b);
+}
2025-02-27  Jakub Jelinek  <ja...@redhat.com>

        PR ipa/119006
        * ipa-icf-gimple.cc (func_checker::compare_operand): If t1 and t2
        are ADDR_EXPRs, call operand_equal_p on their operands rather than on
        the ADDR_EXPRs themselves.  Formatting fix.

        * g++.dg/opt/pr119006.C: New test.

--- gcc/ipa-icf-gimple.cc.jj    2025-02-01 00:50:02.080774328 +0100
+++ gcc/ipa-icf-gimple.cc       2025-02-27 14:24:20.815358621 +0100
@@ -440,9 +440,39 @@ func_checker::compare_operand (tree t1,
   else
     {
       if (operand_equal_p (t1, t2, OEP_MATCH_SIDE_EFFECTS))
-       return true;
-      return return_false_with_msg
-                ("operand_equal_p failed");
+       {
+         if (TREE_CODE (t1) == ADDR_EXPR && TREE_CODE (t2) == ADDR_EXPR)
+           {
+             /* For ADDR_EXPR operand_equal_p compares the operands of
+                it using OEP_ADDRESS_OF and only care about the value
+                of the ADDR_EXPR, rather than e.g. types of MEM_REFs in
+                there.  Some optimizations use such details though, see
+                PR119006.  */
+             tree base1 = TREE_OPERAND (t1, 0);
+             tree base2 = TREE_OPERAND (t2, 0);
+             do
+               {
+                 while (handled_component_p (base1))
+                   base1 = TREE_OPERAND (base1, 0);
+                 while (handled_component_p (base2))
+                   base2 = TREE_OPERAND (base2, 0);
+                 if (!compatible_types_p (TREE_TYPE (base1),
+                                          TREE_TYPE (base2)))
+                   return return_false_with_msg ("ADDR_EXPR base type "
+                                                 "difference");
+                 if (TREE_CODE (base1) != MEM_REF
+                     || TREE_CODE (TREE_OPERAND (base1, 0)) != ADDR_EXPR
+                     || TREE_CODE (base2) != MEM_REF
+                     || TREE_CODE (TREE_OPERAND (base2, 0)) != ADDR_EXPR)
+                   break;
+                 base1 = TREE_OPERAND (TREE_OPERAND (base1, 0), 0);
+                 base2 = TREE_OPERAND (TREE_OPERAND (base2, 0), 0);
+               }
+             while (1);
+           }
+         return true;
+       }
+      return return_false_with_msg ("operand_equal_p failed");
     }
 }
--- gcc/testsuite/g++.dg/opt/pr119006.C.jj      2025-02-27 14:37:05.952707350 
+0100
+++ gcc/testsuite/g++.dg/opt/pr119006.C 2025-02-27 14:36:29.251218260 +0100
@@ -0,0 +1,36 @@
+// PR ipa/119006
+// { dg-do run { target c++11 } }
+// { dg-options "-O2 -fwhole-program" }
+
+struct A {
+  bool operator== (const char *x) const { return x && !__builtin_strcmp (a, 
x); }
+  char a[11];
+};
+
+struct B {
+  bool operator== (const char *x) const { return x && !__builtin_strcmp (a, 
x); }
+  bool operator!= (const char *x) const { return !(*this == x); }
+  char a[128];
+};
+
+[[gnu::noinline,gnu::used]] int
+foo (const A& lhs, const char* rhs)
+{
+  return lhs == rhs;
+}
+
+constexpr const char *t = "abcdefghijklmno";
+
+[[gnu::noinline,gnu::used]] void
+bar (B x)
+{
+  if (x != t) __builtin_abort ();
+}
+
+int
+main ()
+{
+  B b;
+  __builtin_strcpy (b.a, t);
+  bar (b);
+}
--- gcc/ipa-icf.cc.jj   2025-01-02 20:54:32.260128108 +0100
+++ gcc/ipa-icf.cc      2025-02-27 17:50:27.071053295 +0100
@@ -3414,6 +3414,7 @@ sem_item_optimizer::merge_classes (unsig
     }
 
   unsigned int l;
+  unsigned int nfuncmerges = 0;
   std::pair<congruence_class_group *, int> *it;
   FOR_EACH_VEC_ELT (classes, l, it)
     for (unsigned int i = 0; i < it->first->classes.length (); i++)
@@ -3479,6 +3480,8 @@ sem_item_optimizer::merge_classes (unsig
                    symtab_pair p = symtab_pair (source->node, alias->node);
                    m_merged_variables.safe_push (p);
                  }
+               if (merged && alias->type == FUNC)
+                 nfuncmerges++;
              }
          }
 
@@ -3515,6 +3518,12 @@ sem_item_optimizer::merge_classes (unsig
   if (!m_merged_variables.is_empty ())
     fixup_points_to_sets ();
 
+  if (nfuncmerges)
+    {
+      FILE *f = fopen ("/tmp/icf", "a");
+      fprintf (f, "%d %s %d\n", (int) BITS_PER_WORD, main_input_filename ? 
main_input_filename : "-", nfuncmerges);
+      fclose (f);
+    }
   return merged_p;
 }
 

Reply via email to