Hi!
As described in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71083 we generate unaligned accesses because tree-predcom rewrites bitfield and packed accesses in a way that looses the proper alignment information. The attached patch fixes this by re-using the gimple memory expression from the original access. I was not sure, if any non-constant array references would be valid at where the ref_at_iteration is expanded, I set these to the array-lower-bound, as the DR_OFFSET contains the folded value of all non-constant array references. The patch compensates for the constant offset from the outer reference to the inner reference, and replaces the inner reference instead of the outer reference. Boot-strapped and reg-tested on x86_64-linux-gnu. OK for trunk? Thanks Bernd.
2016-08-08 Bernd Edlinger <bernd.edlin...@hotmail.de> PR tree-optimization/71083 * tree-predcom.c (ref_at_iteration): Rewrite the inner reference. testsuite: 2016-08-08 Bernd Edlinger <bernd.edlin...@hotmail.de> PR tree-optimization/71083 * gcc.c-torture/execute/pr71083.c: New test.
Index: gcc/tree-predcom.c =================================================================== --- gcc/tree-predcom.c (revision 239193) +++ gcc/tree-predcom.c (working copy) @@ -1364,11 +1364,33 @@ replace_ref_with (gimple *stmt, tree new_tree, boo /* Returns a memory reference to DR in the ITER-th iteration of the loop it was analyzed in. Append init stmts to STMTS. */ -static tree +static tree ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts) { tree off = DR_OFFSET (dr); tree coff = DR_INIT (dr); + tree ref = unshare_expr (DR_REF (dr)); + tree *exp = &ref; + while (handled_component_p (*exp)) + { + switch (TREE_CODE (*exp)) + { + case ARRAY_REF: + case ARRAY_RANGE_REF: + if (!cst_and_fits_in_hwi (TREE_OPERAND (*exp, 1))) + TREE_OPERAND (*exp, 1) = array_ref_low_bound (*exp); + break; + default: + break; + } + exp = &TREE_OPERAND (*exp, 0); + } + HOST_WIDE_INT bitsize, bitpos; + tree offset; + machine_mode mode; + int unsignedp, reversep, volatilep = 0; + get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode, + &unsignedp, &reversep, &volatilep); if (iter == 0) ; else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST) @@ -1377,27 +1399,16 @@ ref_at_iteration (data_reference_p dr, int iter, g else off = size_binop (PLUS_EXPR, off, size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter))); + if (offset) + off = size_binop (MINUS_EXPR, off, offset); + if (bitpos) + coff = size_binop (MINUS_EXPR, coff, ssize_int (bitpos / BITS_PER_UNIT)); tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off); addr = force_gimple_operand_1 (unshare_expr (addr), stmts, is_gimple_mem_ref_addr, NULL_TREE); - tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff); - /* While data-ref analysis punts on bit offsets it still handles - bitfield accesses at byte boundaries. Cope with that. Note that - we cannot simply re-apply the outer COMPONENT_REF because the - byte-granular portion of it is already applied via DR_INIT and - DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits - start at offset zero. */ - if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF - && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))) - { - tree field = TREE_OPERAND (DR_REF (dr), 1); - return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)), - build2 (MEM_REF, DECL_BIT_FIELD_TYPE (field), - addr, alias_ptr), - DECL_SIZE (field), bitsize_zero_node); - } - else - return fold_build2 (MEM_REF, TREE_TYPE (DR_REF (dr)), addr, alias_ptr); + tree alias_ptr = fold_convert (reference_alias_ptr_type (*exp), coff); + *exp = fold_build2 (MEM_REF, TREE_TYPE (*exp), addr, alias_ptr); + return ref; } /* Get the initialization expression for the INDEX-th temporary variable Index: gcc/testsuite/gcc.c-torture/execute/pr71083.c =================================================================== --- gcc/testsuite/gcc.c-torture/execute/pr71083.c (revision 0) +++ gcc/testsuite/gcc.c-torture/execute/pr71083.c (working copy) @@ -0,0 +1,43 @@ +struct lock_chain { + unsigned int irq_context: 2, + depth: 6, + base: 24; +}; + +__attribute__((noinline, noclone)) +struct lock_chain * foo (struct lock_chain *chain) +{ + int i; + for (i = 0; i < 100; i++) + { + chain[i+1].base = chain[i].base; + } + return chain; +} + +struct lock_chain1 { + char x; + unsigned short base; +} __attribute__((packed)); + +__attribute__((noinline, noclone)) +struct lock_chain1 * bar (struct lock_chain1 *chain) +{ + int i; + for (i = 0; i < 100; i++) + { + chain[i+1].base = chain[i].base; + } + return chain; +} + +struct lock_chain test [101]; +struct lock_chain1 test1 [101]; + +int +main () +{ + foo (test); + bar (test1); + return 0; +}