On Fri, Dec 6, 2013 at 6:19 PM, Richard Biener
<[email protected]> wrote:
> On Mon, Nov 25, 2013 at 7:41 PM, Jeff Law <[email protected]> wrote:
>> On 11/25/13 02:22, bin.cheng wrote:
>>>
>>> Hi,
>>> I previously committed two patches lowering complex address expression for
>>> IVOPT at http://gcc.gnu.org/ml/gcc-patches/2013-11/msg00546.html and
>>> http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01103.html
>>> When I bootstrapping GCC I found there were some peculiar cases like
>>> &MEM[ptr+CST] + xxxx, which should be handled too. This patch consists
>>> below two changes:
>>>
>>> 1) change in alloc_iv:
>>> Original code lowers top level complex address expressions like
>>> &MEM[ptr+off]. The patch relaxes check condition in order to lower
>>> expressions like &MEM[ptr+off] + xxx, just as the BASE from below dump:
>>> use 2
>>> generic
>>> in statement _595 = &MEM[(void *)&this_prg + 36B] + _594;
>>>
>>> at position
>>> type struct gcov_bucket_type *
>>> base (struct gcov_bucket_type *) &MEM[(void *)&this_prg + 36B] +
>>> (sizetype) ((unsigned int) (src_i_683 + -1) * 20)
>>> step 4294967276
>>> base object (void *) &this_prg
>>> related candidates
>>>
>>> 2) change in tree_to_aff_combination:
>>> The function get_inner_reference returns "&MEM[ptr+off]" as the core for
>>> input like the memory ADDRESS in below dump:
>>> use 2
>>> address
>>> in statement _59 = MEM[(const struct gcov_ctr_summary *)summary_22(D) +
>>> 4B].histogram[h_ix_111].min_value;
>>>
>>> at position MEM[(const struct gcov_ctr_summary *)summary_22(D) +
>>> 4B].histogram[h_ix_111].min_value
>>> type const gcov_type *
>>> base (const gcov_type *) &MEM[(const struct gcov_ctr_summary
>>> *)summary_22(D) + 4B] + 36
>>> step 20
>>> base object (void *) summary_22(D)
>>> related candidates
>>>
>>> Which can be further reduced into something like "summary_22(D) + 40B".
>>> This change is necessary for the first one, because I am using
>>> tree_to_aff_combination rather than get_inner_reference_aff now.
>>>
>>> Bootstrap and test on x86/x86_64/arm. Is it OK?
>>>
>>> Thanks.
>>> bin
>>>
>>> 2013-11-25 Bin Cheng <[email protected]>
>>>
>>> * tree-ssa-loop-ivopts.c (contain_complex_addr_expr): New.
>>> (alloc_iv): Lower more cases by calling contain_complex_addr_expr
>>> and tree_to_aff_combination.
>>> * tree-affine.c (tree_to_aff_combination): Handle &MEM[ptr+CST]
>>> in core part of complex reference.
>>>
>>> gcc/testsuite/ChangeLog
>>> 2013-11-25 Bin Cheng <[email protected]>
>>>
>>> * gcc.dg/tree-ssa/ivopts-lower_base.c: New test.
>>
>> Unless there's a PR for this problem, I think this needs to wait.
>
> I agree. Btw, please split the patch.
>
> Index: gcc/tree-affine.c
> ===================================================================
> --- gcc/tree-affine.c (revision 205087)
> +++ gcc/tree-affine.c (working copy)
> @@ -328,7 +328,19 @@ tree_to_aff_combination (tree expr, tree type, aff
> double_int::from_uhwi (bitpos / BITS_PER_UNIT));
> core = build_fold_addr_expr (core);
> if (TREE_CODE (core) == ADDR_EXPR)
> - aff_combination_add_elt (comb, core, double_int_one);
> + {
> + /* Handle &MEM[ptr + CST] in core part of complex reference. */
> + if (TREE_CODE (TREE_OPERAND (core, 0)) == MEM_REF)
> + {
> + core = TREE_OPERAND (core, 0);
> + tree_to_aff_combination (TREE_OPERAND (core, 0), type, &tmp);
> + aff_combination_add (comb, &tmp);
> + tree_to_aff_combination (TREE_OPERAND (core, 1), sizetype, &tmp);
> + aff_combination_add (comb, &tmp);
> + }
> + else
> + aff_combination_add_elt (comb, core, double_int_one);
> + }
> else
> {
> tree_to_aff_combination (core, type, &tmp)
>
> please handle the offset before taking the address, thus
>
> if (TREE_CODE (core) == MEM_REF)
> {
> add constant offset;
> core = TREE_OPERAND (core, 0);
> }
> else
> core = build_fold_addr_expr (core);
>
> that simplifies things and avoids the address building.
>
Hi,
I split the patch into two and updated the test case.
The patches pass bootstrap/tests on x86/x86_64, also pass test on arm cortex-m3.
Is it OK?
Thanks,
bin
PATCH 1:
2014-05-06 Bin Cheng <[email protected]>
* gcc/tree-affine.c (tree_to_aff_combination): Handle MEM_REF for
core part of address expressions.
PATCH 2:
2014-05-06 Bin Cheng <[email protected]>
* gcc/tree-ssa-loop-ivopts.c (contain_complex_addr_expr): New.
(alloc_iv): Lower base expressions containing ADDR_EXPR.
gcc/testsuite/ChangeLog
2014-05-06 Bin Cheng <[email protected]>
* gcc.dg/tree-ssa/ivopts-lower_base.c: New test.
--
Best Regards.
Index: gcc/tree-affine.c
===================================================================
--- gcc/tree-affine.c (revision 210047)
+++ gcc/tree-affine.c (working copy)
@@ -331,7 +331,15 @@ tree_to_aff_combination (tree expr, tree type, aff
break;
aff_combination_const (comb, type,
double_int::from_uhwi (bitpos / BITS_PER_UNIT));
- core = build_fold_addr_expr (core);
+ if (TREE_CODE (core) == MEM_REF)
+ {
+ tree_to_aff_combination (TREE_OPERAND (core, 1), sizetype, &tmp);
+ aff_combination_add (comb, &tmp);
+ core = TREE_OPERAND (core, 0);
+ }
+ else
+ core = build_fold_addr_expr (core);
+
if (TREE_CODE (core) == ADDR_EXPR)
aff_combination_add_elt (comb, core, double_int_one);
else
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lower_base.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lower_base.c (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lower_base.c (revision 0)
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+#include <string.h>
+#include <stdlib.h>
+
+#define MAX_NUM (256)
+
+void
+sort_pointers (size_t n, void **pointers, void **work)
+{
+ typedef unsigned char digit_t;
+ unsigned int count[MAX_NUM];
+ int big_endian_p;
+ size_t i;
+ size_t j;
+
+ if ((sizeof (void *) / sizeof (digit_t)) % 2 != 0)
+ abort ();
+
+ for (i = 0, j = 0; i < sizeof (size_t); ++i)
+ {
+ j *= MAX_NUM;
+ j += i;
+ }
+
+ big_endian_p = (((char *)&j)[0] == 0);
+ for (i = 0; i < sizeof (void *) / sizeof (digit_t); ++i)
+ {
+ digit_t *digit;
+ digit_t *bias;
+ digit_t *top;
+ unsigned int *countp;
+ void **pointerp;
+
+ if (big_endian_p)
+ j = sizeof (void *) / sizeof (digit_t) - i;
+ else
+ j = i;
+
+ memset (count, 0, MAX_NUM * sizeof (unsigned int));
+ bias = ((digit_t *) pointers) + j;
+ top = ((digit_t *) (pointers + n)) + j;
+ for (digit = bias;
+ digit < top;
+ digit += sizeof (void *) / sizeof (digit_t))
+ ++count[*digit];
+
+ for (countp = count + 1; countp < count + MAX_NUM; ++countp)
+ *countp += countp[-1];
+
+ for (pointerp = pointers + n - 1; pointerp >= pointers; --pointerp)
+ work[--count[((digit_t *) pointerp)[j]]] = *pointerp;
+
+ pointerp = pointers;
+ pointers = work;
+ work = pointerp;
+ }
+}
+
+/* { dg-final { scan-tree-dump-not "base \[^\\n\]*&MEM\\\[" "ivopts" } } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
Index: gcc/tree-ssa-loop-ivopts.c
===================================================================
--- gcc/tree-ssa-loop-ivopts.c (revision 210047)
+++ gcc/tree-ssa-loop-ivopts.c (working copy)
@@ -928,36 +928,60 @@ determine_base_object (tree expr)
}
}
+/* Return true if address expression with non-DECL_P operand appears
+ in EXPR. */
+
+static bool
+contain_complex_addr_expr (tree expr)
+{
+ bool res = false;
+
+ STRIP_NOPS (expr);
+ switch (TREE_CODE (expr))
+ {
+ case POINTER_PLUS_EXPR:
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
+ res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
+ break;
+
+ case ADDR_EXPR:
+ return (!DECL_P (TREE_OPERAND (expr, 0)));
+
+ default:
+ return false;
+ }
+
+ return res;
+}
+
/* Allocates an induction variable with given initial value BASE and step STEP
for loop LOOP. */
static struct iv *
alloc_iv (tree base, tree step)
{
- tree base_object = base;
+ tree expr = base;
struct iv *iv = XCNEW (struct iv);
gcc_assert (step != NULL_TREE);
- /* Lower all address expressions except ones with DECL_P as operand.
+ /* Lower address expression in base except ones with DECL_P as operand.
By doing this:
1) More accurate cost can be computed for address expressions;
2) Duplicate candidates won't be created for bases in different
forms, like &a[0] and &a. */
- STRIP_NOPS (base_object);
- if (TREE_CODE (base_object) == ADDR_EXPR
- && !DECL_P (TREE_OPERAND (base_object, 0)))
+ STRIP_NOPS (expr);
+ if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
+ || contain_complex_addr_expr (expr))
{
aff_tree comb;
- double_int size;
- base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
- &comb, &size);
- gcc_assert (base_object != NULL_TREE);
- base_object = build_fold_addr_expr (base_object);
+ tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
}
iv->base = base;
- iv->base_object = determine_base_object (base_object);
+ iv->base_object = determine_base_object (base);
iv->step = step;
iv->biv_p = false;
iv->have_use_for = false;