Hi! As mentioned in the PR, save_expr seems to be very optimistic when some expression is invariant, which can result in various wrong-code issues. The problem is with the TREE_READONLY (t) && !TREE_SIDE_EFFECTS (t) case in tree_invariant_p_1. TREE_READONLY (t) in that case says that the object shouldn't be modified during its lifetime and !TREE_SIDE_EFFECTS (t) that it can be evaluated safely multiple times, but that doesn't mean we can avoid wrapping the expression into SAVE_EXPR say for a TREE_READONLY COMPONENT_REF with INDIRECT_REF as first operand - either the lifetime of the TREE_READONLY object could end earlier than when we need to reevaluate the object (that happens in the pr52339-1.c case where save_expr is called on p->a and then free (p) is done or pr52339.C where delete a->b when calling ~B () dtor deallocates a), or e.g. the pointer could change as in pr52339-2.c (so evaluating p->a again after ++p yields a possibly different value than originally and again we need a SAVE_EXPR).
Attached are two patches which fix this, unfortunately both regress FAIL: gnat.dg/loop_optimization21.adb scan-tree-dump-times optimized "Index_Check" 1 FAIL: gnat.dg/vect1.adb scan-tree-dump-times vect "vectorized 1 loops" 15 FAIL: gnat.dg/vect2.adb scan-tree-dump-times vect "vectorized 1 loops" 15 FAIL: gnat.dg/vect3.adb scan-tree-dump-times vect "vectorized 1 loops" 15 FAIL: gnat.dg/vect4.adb scan-tree-dump-times vect "vectorized 1 loops" 15 FAIL: gnat.dg/vect5.adb scan-tree-dump-times vect "vectorized 1 loops" 15 FAIL: gnat.dg/vect6.adb scan-tree-dump-times vect "vectorized 1 loops" 15 on x86_64-linux (the first scan triggers 2 times rather than once, the next 3 13 times rather than 15 and the last 3 14 times rather than 15 times). The first patch has been otherwise successfully bootstrapped/regtested on x86_64-linux and i686-linux (with that above regressions), the second one is probably better but has been so far tested just on the new testcases and verified to also cause the above Ada regressions. Ok for trunk (which version), what to do about the regressions, shall we just adjust the expected counts or something else? E.g. in the vect6.adb case it is the procedure Add (X : Varray; Y : Long_Float; R : out Varray) is begin for I in X'Range loop R(I) := X(I) + Y; end loop; end; function that is no longer vectorized. Both patches lead to lots of former r.P_BOUNDS->LB0 r.P_BOUNDS->UB0 x.P_BOUNDS->LB0 x.P_BOUNDS->UB0 expressions to be wrapped into SAVE_EXPRs. Jakub
2023-05-05 Jakub Jelinek <ja...@redhat.com> PR c++/52339 * tree.cc (tree_invariant_p_2): New function, copied from tree_invariant_p. (contains_indirect_refs): New function. (tree_invariant_p): Rewritten as wrapper around tree_invariant_p_2. Return false if expression contains INDIRECT_REFs or MEM_REFs which actually dereference some pointer. (save_expr): Use SAVE_EXPR if tree_invariant_p_1 expression contains INDIRECT_REFs or MEM_REfs which actually dereference some pointer. (skip_simple_arithmetic): Use tree_invariant_p_2 instead of tree_invariant_p. * g++.dg/opt/pr52339.C: New test. * gcc.c-torture/execute/pr52339-1.c: New test. * gcc.c-torture/execute/pr52339-2.c: New test. --- gcc/tree.cc.jj 2023-05-01 09:59:46.686293833 +0200 +++ gcc/tree.cc 2023-05-04 15:40:58.684762277 +0200 @@ -3920,13 +3920,43 @@ tree_invariant_p_1 (tree t) /* Return true if T is function-invariant. */ -bool -tree_invariant_p (tree t) +static bool +tree_invariant_p_2 (tree t) { tree inner = skip_simple_arithmetic (t); return tree_invariant_p_1 (inner); } +/* Return non-NULL if *TP is INDIRECT_REF or MEM_REF with first operand + other than address of a decl. */ + +static tree +contains_indirect_refs (tree *tp, int *, void *) +{ + tree t = *tp; + if (TREE_CODE (t) == INDIRECT_REF) + return t; + else if (TREE_CODE (t) == MEM_REF + && (TREE_CODE (TREE_OPERAND (t, 0)) != ADDR_EXPR + || !DECL_P (TREE_OPERAND (TREE_OPERAND (t, 0), 0)))) + return t; + else + return NULL_TREE; +} + +/* Return true if T is function-invariant. Return false for expressions + containing pointer/reference dereferences. */ + +bool +tree_invariant_p (tree t) +{ + if (!tree_invariant_p_2 (t)) + return false; + return (TREE_CONSTANT (t) + || !walk_tree_without_duplicates (&t, contains_indirect_refs, + NULL)); +} + /* Wrap a SAVE_EXPR around EXPR, if appropriate. Do this to any expression which may be used in more than one place, but must be evaluated only once. @@ -3963,7 +3993,13 @@ save_expr (tree expr) if (TREE_CODE (inner) == ERROR_MARK) return inner; - if (tree_invariant_p_1 (inner)) + if (tree_invariant_p_1 (inner) + && (TREE_CONSTANT (expr) + /* Use SAVE_EXPR if there are any pointer dereferences, evaluating + such expressions again and again might lead to use-after-free. + See PR52339. */ + || !walk_tree_without_duplicates (&expr, contains_indirect_refs, + NULL))) return expr; /* If INNER contains a PLACEHOLDER_EXPR, we must evaluate it each time, since @@ -4008,9 +4044,9 @@ skip_simple_arithmetic (tree expr) expr = TREE_OPERAND (expr, 0); else if (BINARY_CLASS_P (expr)) { - if (tree_invariant_p (TREE_OPERAND (expr, 1))) + if (tree_invariant_p_2 (TREE_OPERAND (expr, 1))) expr = TREE_OPERAND (expr, 0); - else if (tree_invariant_p (TREE_OPERAND (expr, 0))) + else if (tree_invariant_p_2 (TREE_OPERAND (expr, 0))) expr = TREE_OPERAND (expr, 1); else break; --- gcc/testsuite/g++.dg/opt/pr52339.C.jj 2023-05-04 15:23:20.459935705 +0200 +++ gcc/testsuite/g++.dg/opt/pr52339.C 2023-05-04 15:22:35.640578681 +0200 @@ -0,0 +1,19 @@ +// PR c++/52339 +// { dg-do run { target c++11 } } + + +struct B; +struct A { B *b; }; +struct B { + A *a; + B () : a(new A{this}) {} + ~B () { delete a; } +}; + +int +main () +{ + B *b = new B; + const A *a = b->a; + delete a->b; +} --- gcc/testsuite/gcc.c-torture/execute/pr52339-1.c.jj 2023-05-04 15:22:59.177241023 +0200 +++ gcc/testsuite/gcc.c-torture/execute/pr52339-1.c 2023-05-04 15:20:19.820527142 +0200 @@ -0,0 +1,29 @@ +/* PR c++/52339 */ + +struct S { int a; }; + +void +bar (int *p, struct S *q) +{ + __builtin_free (q); +} + +int +foo (const struct S *p, struct S *q) +{ + int b[p->a]; + bar (b, q); + return sizeof (b); +} + +int +main () +{ + struct S *p = __builtin_malloc (sizeof (struct S)); + if (!p) + return 0; + p->a = 42; + if (foo (p, p) != 42 * sizeof (int)) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.c-torture/execute/pr52339-2.c.jj 2022-11-21 10:04:00.210677046 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr52339-2.c 2023-05-04 19:34:08.581686806 +0200 @@ -0,0 +1,20 @@ +/* PR c++/52339 */ + +struct S { int a; }; + +int +foo (const struct S *p) +{ + int b[p->a]; + ++p; + return sizeof (b); +} + +int +main () +{ + struct S s[] = { { 42 }, { 43 } }; + if (foo (s) != 42 * sizeof (int)) + __builtin_abort (); + return 0; +}
2023-05-05 Jakub Jelinek <ja...@redhat.com> PR c++/52339 * tree.cc (tree_invariant_p_1): For TREE_READONLY (t) without side-effects, only return true if DECL_P (get_base_address (t)). * g++.dg/opt/pr52339.C: New test. * gcc.c-torture/execute/pr52339-1.c: New test. * gcc.c-torture/execute/pr52339-2.c: New test. --- gcc/tree.cc.jj 2023-05-01 09:59:46.686293833 +0200 +++ gcc/tree.cc 2023-05-05 10:19:19.061827355 +0200 @@ -3876,10 +3876,21 @@ tree_invariant_p_1 (tree t) { tree op; - if (TREE_CONSTANT (t) - || (TREE_READONLY (t) && !TREE_SIDE_EFFECTS (t))) + if (TREE_CONSTANT (t)) return true; + if (TREE_READONLY (t) && !TREE_SIDE_EFFECTS (t)) + { + /* Return true for const qualified vars, but for members or array + elements without side-effects return true only if the base + object is a decl. If the base is e.g. a pointer dereference, + what the pointer points to could be deallocated or the pointer + could be changed. See PR52339. */ + tree base = get_base_address (t); + if (DECL_P (base)) + return true; + } + switch (TREE_CODE (t)) { case SAVE_EXPR: --- gcc/testsuite/g++.dg/opt/pr52339.C.jj 2023-05-04 15:23:20.459935705 +0200 +++ gcc/testsuite/g++.dg/opt/pr52339.C 2023-05-04 15:22:35.640578681 +0200 @@ -0,0 +1,19 @@ +// PR c++/52339 +// { dg-do run { target c++11 } } + + +struct B; +struct A { B *b; }; +struct B { + A *a; + B () : a(new A{this}) {} + ~B () { delete a; } +}; + +int +main () +{ + B *b = new B; + const A *a = b->a; + delete a->b; +} --- gcc/testsuite/gcc.c-torture/execute/pr52339-1.c.jj 2023-05-04 15:22:59.177241023 +0200 +++ gcc/testsuite/gcc.c-torture/execute/pr52339-1.c 2023-05-04 15:20:19.820527142 +0200 @@ -0,0 +1,29 @@ +/* PR c++/52339 */ + +struct S { int a; }; + +void +bar (int *p, struct S *q) +{ + __builtin_free (q); +} + +int +foo (const struct S *p, struct S *q) +{ + int b[p->a]; + bar (b, q); + return sizeof (b); +} + +int +main () +{ + struct S *p = __builtin_malloc (sizeof (struct S)); + if (!p) + return 0; + p->a = 42; + if (foo (p, p) != 42 * sizeof (int)) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.c-torture/execute/pr52339-2.c.jj 2022-11-21 10:04:00.210677046 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr52339-2.c 2023-05-04 19:34:08.581686806 +0200 @@ -0,0 +1,20 @@ +/* PR c++/52339 */ + +struct S { int a; }; + +int +foo (const struct S *p) +{ + int b[p->a]; + ++p; + return sizeof (b); +} + +int +main () +{ + struct S s[] = { { 42 }, { 43 } }; + if (foo (s) != 42 * sizeof (int)) + __builtin_abort (); + return 0; +}