Hi!

On Thu, Dec 05, 2024 at 11:37:45AM +0100, Richard Biener wrote:
> VN again is the culprit for exploiting address equivalences before
> __builtin_object_size got the chance to do its job.  This time
> it isn't about union members but adjacent structure fields where
> an address to one after the last element of an array field can
> spill over to the next field.
> 
> The following protects all out-of-bound accesses on the upper bound
> side (singling out TYPE_MAX_VALUE + 1 is more expensive).  It
> ignores other out-of-bound addresses that would invoke UB.
> 
> Zero-sized arrays are a bit awkward because the C++ represents them
> with a -1U upper bound.
> 
> There's a similar issue for zero-sized components whose address can
> be the same as the adjacent field in C.

So, as I wrote earlier, I'd suggest dropping gcc.dg/torture/pr117912-3.c
from the patch and replacing it with the attached one (now tweaked
to use [[no_unique_address]] and be C++ compatible).

Generally, I'm really surprised all these routines only look at one
reference, don't compare two references (one earlier seen, another one
compared against that), which is where something like tree-object-size.cc
(addr_object_size) like checking could be done, but guess that just
shows how little I know about SCCVN.

The 3 hunks seem correct in what they attempt to do, zero sized components
are indeed a problem because they result in the same offset for perhaps
__bos POV incompatible access.  And similarly the out of bounds (invalid)
plus the boundary case (valid to have address taken, invalid to dereference
it).  And to my surprise (haven't studied how it actually works), if I
modify the testcases to have
  bar (&p->b[24]);
  bar (&p->b[24]);
  bar (&p->b[24]);
(in -1.c) or
  bar (&p->b);
  bar (&p->b);
  bar (&p->b);
(in -3.c), then fre1 still optimizes all the &p->b[24] to just one SSA_NAME
(or all the &p->b).

> --- a/gcc/tree-ssa-sccvn.cc
> +++ b/gcc/tree-ssa-sccvn.cc
> @@ -987,9 +987,12 @@ copy_reference_ops_from_ref (tree ref, 
> vec<vn_reference_op_s> *result)
>                        + (wi::to_offset (bit_offset) >> LOG2_BITS_PER_UNIT));
>                   /* Probibit value-numbering zero offset components

s/Probibit/Prohibit/

>                      of addresses the same before the pass folding
> -                    __builtin_object_size had a chance to run.  */
> +                    __builtin_object_size had a chance to run.  Likewise
> +                    for components of zero size at arbitrary offset.  */
>                   if (TREE_CODE (orig) != ADDR_EXPR
> -                     || maybe_ne (off, 0)
> +                     || (TYPE_SIZE (temp.type)
> +                         && !integer_zerop (TYPE_SIZE (temp.type))

Maybe && integer_nonzerop (TYPE_SIZE (temp.type)) instead?
I mean, if we have a VL structure
struct T { int b[n]; };
struct S { int a; struct T b; int c; };
then b could have zero size (if n == 0) or non-zero one.  Perhaps
it is punted on elsewhere and __builtin_object_size (&s->b, 1) and
__builtin_object_size (&s->c, 1) could be different.

> +                         && maybe_ne (off, 0))
>                       || (cfun->curr_properties & PROP_objsz))
>                     off.to_shwi (&temp.off);
>                 }
> @@ -1010,9 +1013,30 @@ copy_reference_ops_from_ref (tree ref, 
> vec<vn_reference_op_s> *result)
>           if (! temp.op2)
>             temp.op2 = size_binop (EXACT_DIV_EXPR, TYPE_SIZE_UNIT (eltype),
>                                    size_int (TYPE_ALIGN_UNIT (eltype)));
> +         /* Probibit value-numbering addresses of out-of-bound ARRAY_REFs

s/Probibit/Prohibit/ again

Plus am not sure if out-of-bound is the right term to use, in the test
b[24] is not out of bounds, it is one past the last element in the array
in C/C++ terms, which is out of bounds only for dereferences.
Perhaps calling the var oob is fine if the comment makes it clear that
it is about real out of bounds accesses as well as taking address of the
one past the last element in an array.

> +            the same as addresses of other components before the pass
> +            folding __builtin_object_size had a chance to run.  */
> +         bool avoid_oob = true;
> +         if (TREE_CODE (orig) != ADDR_EXPR
> +             || cfun->curr_properties & PROP_objsz)
> +           avoid_oob = false;
> +         else if (poly_int_tree_p (temp.op0))
> +           {
> +             tree ub = array_ref_up_bound (ref);
> +             if (ub
> +                 && poly_int_tree_p (ub)
> +                 /* ???  The C frontend for T[0] uses [0:] and the
> +                    C++ frontend [0:-1U].  See layout_type for how
> +                    awkward this is.  */
> +                 && !integer_minus_onep (ub)
> +                 && known_le (wi::to_poly_offset (temp.op0),
> +                              wi::to_poly_offset (ub)))
> +               avoid_oob = false;

I wonder if we shouldn't punt (aka have avoid_oob true) also on temp.op0 smaller
than array_ref_low_bound (ref).

> +           }
>           if (poly_int_tree_p (temp.op0)
>               && poly_int_tree_p (temp.op1)
> -             && TREE_CODE (temp.op2) == INTEGER_CST)
> +             && TREE_CODE (temp.op2) == INTEGER_CST
> +             && !avoid_oob)
>             {
>               poly_offset_int off = ((wi::to_poly_offset (temp.op0)
>                                       - wi::to_poly_offset (temp.op1))
> @@ -1754,6 +1778,23 @@ re_valueize:
>              && poly_int_tree_p (vro->op1)
>              && TREE_CODE (vro->op2) == INTEGER_CST)
>       {
> +       /* Probibit value-numbering addresses of out-of-bound ARRAY_REFs

Again Prohibit
Same concern about naming.

> +          the same as addresses of other components before the pass
> +          folding __builtin_object_size had a chance to run.  */
> +       if (!(cfun->curr_properties & PROP_objsz)
> +           && (*orig)[0].opcode == ADDR_EXPR)
> +         {
> +           tree dom = TYPE_DOMAIN ((*orig)[i + 1].type);
> +           if (!dom
> +               || !TYPE_MAX_VALUE (dom)
> +               || !poly_int_tree_p (TYPE_MAX_VALUE (dom))
> +               || integer_minus_onep (TYPE_MAX_VALUE (dom)))
> +             continue;
> +           if (!known_le (wi::to_poly_offset (vro->op0),
> +                          wi::to_poly_offset (TYPE_MAX_VALUE (dom))))
> +             continue;

And again wonder about low bound.

> +         }
> +
>         poly_offset_int off = ((wi::to_poly_offset (vro->op0)
>                                 - wi::to_poly_offset (vro->op1))
>                                * wi::to_offset (vro->op2)

Otherwise LGTM.

--- gcc/testsuite/c-c++-common/torture/pr117912-3.c.jj  2024-12-09 
12:04:07.892204122 +0100
+++ gcc/testsuite/c-c++-common/torture/pr117912-3.c     2024-12-09 
12:10:25.309936271 +0100
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-additional-options "-std=gnu++20" { target c++ } } */
+
+struct B {};
+struct A { int a;
+#ifdef __cplusplus
+          [[no_unique_address]]
+#endif
+          struct B b;
+          char c[]; };
+volatile void *p;
+
+void __attribute__((noipa))
+bar (void *q)
+{
+  p = q;
+}
+
+__SIZE_TYPE__ __attribute__((noipa))
+foo (struct A *p)
+{
+  bar (&p->b);
+  bar (&p->c);
+  return __builtin_object_size (&p->c, 1);
+}
+
+__SIZE_TYPE__ __attribute__((noipa))
+baz (void)
+{
+  struct A *p = (struct A *) __builtin_malloc (__builtin_offsetof (struct A, 
c) + 64);
+  bar (&p->b);
+  bar (&p->c);
+  return __builtin_object_size (&p->c, 1);
+}
+
+__SIZE_TYPE__ __attribute__((noipa))
+qux (struct A *p)
+{
+  bar (&p->b);
+  bar (&p->c);
+  return __builtin_object_size (&p->c, 3);
+}
+
+__SIZE_TYPE__ __attribute__((noipa))
+boo (void)
+{
+  struct A *p = (struct A *) __builtin_malloc (__builtin_offsetof (struct A, 
c) + 64);
+  bar (&p->b);
+  bar (&p->c);
+  return __builtin_object_size (&p->c, 3);
+}
+
+int
+main ()
+{
+  static struct A a = { .a = 1, .b = {}, .c = { 1, 2, 3, 4, 0 } };
+  if (foo (&a) < 5)
+    __builtin_abort ();
+  if (baz () < 64)
+    __builtin_abort ();
+}


        Jakub

Reply via email to