On 08/09/16 09:29, Richard Biener wrote:
> On Mon, 8 Aug 2016, Bernd Edlinger wrote:
>
>> Hi!
>>
>>
>> As described in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71083
>> we generate unaligned accesses because tree-predcom rewrites
>> bitfield and packed accesses in a way that looses the proper
>> alignment information.
>>
>> The attached patch fixes this by re-using the gimple memory
>> expression from the original access.
>>
>> I was not sure, if any non-constant array references would be
>> valid at where the ref_at_iteration is expanded, I set these
>> to the array-lower-bound, as the DR_OFFSET contains the folded
>> value of all non-constant array references.
>>
>> The patch compensates for the constant offset from the outer
>> reference to the inner reference, and replaces the inner
>> reference instead of the outer reference.
>>
>>
>> Boot-strapped and reg-tested on x86_64-linux-gnu.
>> OK for trunk?
>
> I don't think what you do is correct. Consider
>
> for (i)
> for (j)
> {
> ... = a[i][j];
> }
>
> predcom considers innermost loops only and thus the DRs will
> be analyzed with respect to that which means DR_BASE_ADDRESS
> is &a[i][0] but you end up generating (*(&a) + i * ..)[0][0]
> which is at best suspicious with respect to further analyses
> by data-ref and TBAA. Also note that this may get alignment
> wrong as well as changing [i] to [0] may lead to false alignment
> (consider a [2][2] char array aligned to 4 bytes).
>
> Your patch goes halfway back to code we had in the past
> (looking at the 4.3 branch right now) which had numerous
> issues (don't remember exactly).
>
Hmm. Yes. These ARRAY_REFs ruin the whole idea :)
> I believe that if we'd handle bitfields by
>
> if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
> && DECL_BIT_FIELD_TYPE (TREE_OPERAND (DR_REF (dr), 1)))
> ref = TREE_OPERAND (DR_REF (dr), 0);
> else
> ref = DR_REF (dr);
> unsigned align = get_object_alignment (ref);
>
> and use the type / alignment of that ref for the built MEM_REF
> (with coff adjusted by the split bitfield component-ref offset)
> and build a COMPONENT_REF around the MEM_REF to handle the
> bitfield part this should work ok.
>
Ooookay. I think your idea works in principle.
Attached a new version of the patch, I hope it did not become
too ugly.
I think from Eric's comment in get_inner_ref it can be possible
in Ada that the outer object is accidentally byte-aligned
but the inner reference is not. In that case I think it is
better to generate a BIT_FIELD_REF instead of a COMPONENT_REF.
Eric do you agree? Are there any Ada test cases where the
pcom optimization jumps in?
We prefer the COMPONENT_REF just because it is likely more
aligned than the bit field itself. The mem_ref should be
correctly aligned in both cases.
I was not sure if I should pass the TREE_OPERAND(ref, 2) to
the COMPONENT_REF constructor. Is that used at all? All other
places where COMPONENT_REF are built, seen to pass NULL there.
Bootstrap on x86_64-linux-gnu, reg-test is still running.
Is it OK it no regressions?
Thanks
Bernd.
2016-08-08 Bernd Edlinger <[email protected]>
PR tree-optimization/71083
* tree-predcom.c (ref_at_iteration): Correctly align the
inner reference.
testsuite:
2016-08-08 Bernd Edlinger <[email protected]>
PR tree-optimization/71083
* gcc.c-torture/execute/pr71083.c: New test.
Index: gcc/tree-predcom.c
===================================================================
--- gcc/tree-predcom.c (revision 239193)
+++ gcc/tree-predcom.c (working copy)
@@ -213,6 +213,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-scalar-evolution.h"
#include "params.h"
#include "tree-affine.h"
+#include "builtins.h"
/* The maximum number of iterations between the considered memory
references. */
@@ -1364,11 +1365,16 @@ replace_ref_with (gimple *stmt, tree new_tree, boo
/* Returns a memory reference to DR in the ITER-th iteration of
the loop it was analyzed in. Append init stmts to STMTS. */
-static tree
+static tree
ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
{
tree off = DR_OFFSET (dr);
tree coff = DR_INIT (dr);
+ tree ref = DR_REF (dr);
+ enum tree_code ref_code = ERROR_MARK;
+ tree ref_type = NULL_TREE;
+ tree ref_op1 = NULL_TREE;
+ tree ref_op2 = NULL_TREE;
if (iter == 0)
;
else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
@@ -1377,27 +1383,43 @@ ref_at_iteration (data_reference_p dr, int iter, g
else
off = size_binop (PLUS_EXPR, off,
size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
+ if (TREE_CODE (ref) == COMPONENT_REF
+ && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
+ {
+ unsigned HOST_WIDE_INT boff;
+ tree field = TREE_OPERAND (ref, 1);
+ ref_type = TREE_TYPE (ref);
+ boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
+ /* This can occur in Ada. See Eric's comment in get_bit_range. */
+ if ((boff % BITS_PER_UNIT) != 0)
+ {
+ ref_code = BIT_FIELD_REF;
+ ref_op1 = DECL_SIZE (field);
+ ref_op2 = bitsize_zero_node;
+ }
+ else
+ {
+ boff >>= LOG2_BITS_PER_UNIT;
+ boff += tree_to_uhwi (component_ref_field_offset (ref));
+ coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
+ ref_code = COMPONENT_REF;
+ ref_op1 = field;
+ ref_op2 = TREE_OPERAND (ref, 2);
+ ref = TREE_OPERAND (ref, 0);
+ }
+ }
tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
is_gimple_mem_ref_addr, NULL_TREE);
- tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff);
+ tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
+ tree type = build_aligned_type (TREE_TYPE (ref),
+ get_object_alignment (ref));
+ ref = build2 (MEM_REF, type, addr, alias_ptr);
/* While data-ref analysis punts on bit offsets it still handles
- bitfield accesses at byte boundaries. Cope with that. Note that
- we cannot simply re-apply the outer COMPONENT_REF because the
- byte-granular portion of it is already applied via DR_INIT and
- DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
- start at offset zero. */
- if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
- && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
- {
- tree field = TREE_OPERAND (DR_REF (dr), 1);
- return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)),
- build2 (MEM_REF, DECL_BIT_FIELD_TYPE (field),
- addr, alias_ptr),
- DECL_SIZE (field), bitsize_zero_node);
- }
- else
- return fold_build2 (MEM_REF, TREE_TYPE (DR_REF (dr)), addr, alias_ptr);
+ bitfield accesses at byte boundaries. Cope with that. */
+ if (ref_type)
+ ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
+ return ref;
}
/* Get the initialization expression for the INDEX-th temporary variable
Index: gcc/testsuite/gcc.c-torture/execute/pr71083.c
===================================================================
--- gcc/testsuite/gcc.c-torture/execute/pr71083.c (revision 0)
+++ gcc/testsuite/gcc.c-torture/execute/pr71083.c (working copy)
@@ -0,0 +1,43 @@
+struct lock_chain {
+ unsigned int irq_context: 2,
+ depth: 6,
+ base: 24;
+};
+
+__attribute__((noinline, noclone))
+struct lock_chain * foo (struct lock_chain *chain)
+{
+ int i;
+ for (i = 0; i < 100; i++)
+ {
+ chain[i+1].base = chain[i].base;
+ }
+ return chain;
+}
+
+struct lock_chain1 {
+ char x;
+ unsigned short base;
+} __attribute__((packed));
+
+__attribute__((noinline, noclone))
+struct lock_chain1 * bar (struct lock_chain1 *chain)
+{
+ int i;
+ for (i = 0; i < 100; i++)
+ {
+ chain[i+1].base = chain[i].base;
+ }
+ return chain;
+}
+
+struct lock_chain test [101];
+struct lock_chain1 test1 [101];
+
+int
+main ()
+{
+ foo (test);
+ bar (test1);
+ return 0;
+}