On 08/10/16 14:29, Richard Biener wrote: > On Tue, 9 Aug 2016, Bernd Edlinger wrote: >> We know that the difference between the innermost ref >> and the outer ref is byte-aligned, but we do not know >> that the same is true for offset between the COMPONENT_REF >> and the outer ref. >> >> I mean boff is essentially the difference between >> bitpos of get_inner_reference(exp) and >> bitpos of get_inner_reference(TREE_OPERAND (exp, 0)) >> >> This would be exposed by my patch, because previously >> we always generated BIT_FIELD_REFS, with bit-offset 0, >> but the MEM_REF at the byte-offset there is in all likelihood >> pretty much unaligned, the MEM_REF at the COMPONENT_REF >> is likely more aligned and allows better code for ARM processors, >> but only if the MEM_REF is at a byte-aligned offset at all, >> otherwise the whole transformation would be wrong. > > Note that the important thing to ensure is that the access the > MEM_REF performs is correct TBAA-wise which means you either > have to use alias-set zero (ptr_type_node offset) or _not_ > shuffle the offset arbitrarily between the MEM_REF and the > components you wrap it in. > > Richard. >
Yes, the patch exactly replicates the outermost COMPONENT_REF and subtracts the component's byte-offset from the MEM_REF's address, and the MEM_REF uses the pointer type of the inner reference. In the case without bitfields and the Ada bitfields the patch changes nothing, except we build an aligned type out of TREE_TYPE (DR_REF (dr)) and get_object_alignment (DR_REF (dr)). In the case with a component_ref that is byte-aligned we subtract the component byte offset from the address before the MEM_REF is constructed. And the alias_ptr is of type reference_alias_ptr_type (TREE_OPERAND (DR_REF (dr), 0)) and again the alignment from get_object_alignment (TREE_OPERAND (DR_REF (dr), 0) so that should be exactly type-correct from TBAA's perspective. Attached a new version of the patch with an improved comment, and the new Ada test cases. Bootstrap and reg-test on x86_64-pc-linux-gnu without regression. Is it OK for trunk? Thanks Bernd.
2016-08-08 Bernd Edlinger <bernd.edlin...@hotmail.de> PR tree-optimization/71083 * tree-predcom.c (ref_at_iteration): Correctly align the inner reference. testsuite: 2016-08-08 Bernd Edlinger <bernd.edlin...@hotmail.de> PR tree-optimization/71083 * gcc.c-torture/execute/pr71083.c: New test. * gnat.dg/loop_optimization23.adb: New test. * gnat.dg/loop_optimization23_pkg.ads: New test. * gnat.dg/loop_optimization23_pkg.adb: New test.
Index: gcc/tree-predcom.c =================================================================== --- gcc/tree-predcom.c (revision 239193) +++ gcc/tree-predcom.c (working copy) @@ -213,6 +213,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-scalar-evolution.h" #include "params.h" #include "tree-affine.h" +#include "builtins.h" /* The maximum number of iterations between the considered memory references. */ @@ -1364,11 +1365,16 @@ replace_ref_with (gimple *stmt, tree new_tree, boo /* Returns a memory reference to DR in the ITER-th iteration of the loop it was analyzed in. Append init stmts to STMTS. */ -static tree +static tree ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts) { tree off = DR_OFFSET (dr); tree coff = DR_INIT (dr); + tree ref = DR_REF (dr); + enum tree_code ref_code = ERROR_MARK; + tree ref_type = NULL_TREE; + tree ref_op1 = NULL_TREE; + tree ref_op2 = NULL_TREE; if (iter == 0) ; else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST) @@ -1377,27 +1383,48 @@ ref_at_iteration (data_reference_p dr, int iter, g else off = size_binop (PLUS_EXPR, off, size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter))); - tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off); - addr = force_gimple_operand_1 (unshare_expr (addr), stmts, - is_gimple_mem_ref_addr, NULL_TREE); - tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff); /* While data-ref analysis punts on bit offsets it still handles bitfield accesses at byte boundaries. Cope with that. Note that - we cannot simply re-apply the outer COMPONENT_REF because the - byte-granular portion of it is already applied via DR_INIT and - DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits + if the bitfield object also starts at a byte-boundary we can simply + replicate the COMPONENT_REF, but we have to subtract the component's + byte-offset from the MEM_REF address first. + Otherwise we simply build a BIT_FIELD_REF knowing that the bits start at offset zero. */ - if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF - && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))) + if (TREE_CODE (ref) == COMPONENT_REF + && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))) { - tree field = TREE_OPERAND (DR_REF (dr), 1); - return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)), - build2 (MEM_REF, DECL_BIT_FIELD_TYPE (field), - addr, alias_ptr), - DECL_SIZE (field), bitsize_zero_node); + unsigned HOST_WIDE_INT boff; + tree field = TREE_OPERAND (ref, 1); + ref_type = TREE_TYPE (ref); + boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field)); + /* This can occur in Ada. See the comment in get_bit_range. */ + if (boff % BITS_PER_UNIT != 0) + { + ref_code = BIT_FIELD_REF; + ref_op1 = DECL_SIZE (field); + ref_op2 = bitsize_zero_node; + } + else + { + boff >>= LOG2_BITS_PER_UNIT; + boff += tree_to_uhwi (component_ref_field_offset (ref)); + coff = size_binop (MINUS_EXPR, coff, ssize_int (boff)); + ref_code = COMPONENT_REF; + ref_op1 = field; + ref_op2 = TREE_OPERAND (ref, 2); + ref = TREE_OPERAND (ref, 0); + } } - else - return fold_build2 (MEM_REF, TREE_TYPE (DR_REF (dr)), addr, alias_ptr); + tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off); + addr = force_gimple_operand_1 (unshare_expr (addr), stmts, + is_gimple_mem_ref_addr, NULL_TREE); + tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff); + tree type = build_aligned_type (TREE_TYPE (ref), + get_object_alignment (ref)); + ref = build2 (MEM_REF, type, addr, alias_ptr); + if (ref_type) + ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2); + return ref; } /* Get the initialization expression for the INDEX-th temporary variable Index: gcc/testsuite/gcc.c-torture/execute/pr71083.c =================================================================== --- gcc/testsuite/gcc.c-torture/execute/pr71083.c (revision 0) +++ gcc/testsuite/gcc.c-torture/execute/pr71083.c (working copy) @@ -0,0 +1,43 @@ +struct lock_chain { + unsigned int irq_context: 2, + depth: 6, + base: 24; +}; + +__attribute__((noinline, noclone)) +struct lock_chain * foo (struct lock_chain *chain) +{ + int i; + for (i = 0; i < 100; i++) + { + chain[i+1].base = chain[i].base; + } + return chain; +} + +struct lock_chain1 { + char x; + unsigned short base; +} __attribute__((packed)); + +__attribute__((noinline, noclone)) +struct lock_chain1 * bar (struct lock_chain1 *chain) +{ + int i; + for (i = 0; i < 100; i++) + { + chain[i+1].base = chain[i].base; + } + return chain; +} + +struct lock_chain test [101]; +struct lock_chain1 test1 [101]; + +int +main () +{ + foo (test); + bar (test1); + return 0; +} Index: gcc/testsuite/gnat.dg/loop_optimization23.adb =================================================================== --- gcc/testsuite/gnat.dg/loop_optimization23.adb (revision 0) +++ gcc/testsuite/gnat.dg/loop_optimization23.adb (working copy) @@ -0,0 +1,14 @@ +-- { dg-do run } +-- { dg-options "-O3" } +-- PR tree-optimization/71083 +with Loop_Optimization23_Pkg; +use Loop_Optimization23_Pkg; +procedure Loop_Optimization23 is + Test : ArrayOfStructB; +begin + Test (0).b.b := 9999; + Foo (Test); + if Test (100).b.b /= 9999 then + raise Program_Error; + end if; +end; Index: gcc/testsuite/gnat.dg/loop_optimization23_pkg.adb =================================================================== --- gcc/testsuite/gnat.dg/loop_optimization23_pkg.adb (revision 0) +++ gcc/testsuite/gnat.dg/loop_optimization23_pkg.adb (working copy) @@ -0,0 +1,11 @@ +-- { dg-do compile } +-- { dg-options "-O3" } +-- PR tree-optimization/71083 +package body Loop_Optimization23_Pkg is + procedure Foo (X : in out ArrayOfStructB) is + begin + for K in 0..99 loop + X (K+1).b.b := X (K).b.b; + end loop; + end Foo; +end Loop_Optimization23_Pkg; Index: gcc/testsuite/gnat.dg/loop_optimization23_pkg.ads =================================================================== --- gcc/testsuite/gnat.dg/loop_optimization23_pkg.ads (revision 0) +++ gcc/testsuite/gnat.dg/loop_optimization23_pkg.ads (working copy) @@ -0,0 +1,17 @@ +-- PR tree-optimization/71083 +package Loop_Optimization23_Pkg is + type Nibble is mod 2**4; + type Int24 is mod 2**24; + type StructA is record + a : Nibble; + b : Int24; + end record; + pragma Pack(StructA); + type StructB is record + a : Nibble; + b : StructA; + end record; + pragma Pack(StructB); + type ArrayOfStructB is array(0..100) of StructB; + procedure Foo (X : in out ArrayOfStructB); +end Loop_Optimization23_Pkg;