Ping.

On Mon, 2011-10-24 at 08:38 -0500, William J. Schmidt wrote:
> OK, I've removed the pointer-arithmetic case from expand, to be handled
> later by straight-line strength reduction.  Here's the patch to deal
> with just the specific pattern of PR46556 (which will also eventually be
> handled by strength reduction, but not as quickly).
> 
> (FYI, I've been thinking through the strength reduction pass, and my
> plan is to stage in some of the easiest cases first, hopefully for 4.7,
> and gradually add the more complex pieces.  Explicit multiplies in the
> IL with known constants can be done pretty easily.  More complexity is
> added when the multiplier is a variable, when conditional increments are
> present, and when multiplies are hidden in addressing expressions.)
> 
> The present patch was bootstrapped and regression-tested on
> powerpc64-linux.  OK for trunk?
> 
> Thanks,
> Bill
> 
> 
> 2011-10-24  Bill Schmidt  <wschm...@linux.vnet.ibm.com>
> 
> gcc:
> 
>       PR rtl-optimization/46556
>       * expr.c (restructure_base_and_offset): New function.
>       (expand_expr_real_1): Replace result of get_inner_reference
>       with result of restructure_base_and_offset when applicable.
>       * Makefile.in (expr.o): Update dependencies.
> 
> gcc/testsuite:
> 
>       PR rtl-optimization/46556
>       * gcc.dg/tree-ssa-pr46556-1.c: New testcase.
>       * gcc.dg/tree-ssa-pr46556-2.c: Likewise.
>       * gcc.dg/tree-ssa-pr46556-3.c: Likewise.
> 
> 
> Index: gcc/testsuite/gcc.dg/tree-ssa/pr46556-1.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/pr46556-1.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr46556-1.c (revision 0)
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-rtl-expand" } */
> +
> +struct x
> +{
> +  int a[16];
> +  int b[16];
> +  int c[16];
> +};
> +
> +extern void foo (int, int, int);
> +
> +void
> +f (struct x *p, unsigned int n)
> +{
> +  foo (p->a[n], p->c[n], p->b[n]);
> +}
> +
> +/* { dg-final { scan-rtl-dump-times "\\(mem/s:SI \\(plus:" 2 "expand" } } */
> +/* { dg-final { scan-rtl-dump-times "const_int 128" 1 "expand" } } */
> +/* { dg-final { scan-rtl-dump-times "const_int 64 \\\[0x40\\\]\\)\\) \\\[" 1 
> "expand" } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/pr46556-2.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/pr46556-2.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr46556-2.c (revision 0)
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-rtl-expand" } */
> +
> +struct x
> +{
> +  int a[16];
> +  int b[16];
> +  int c[16];
> +};
> +
> +extern void foo (int, int, int);
> +
> +void
> +f (struct x *p, unsigned int n)
> +{
> +  foo (p->a[n], p->c[n], p->b[n]);
> +  if (n > 12)
> +    foo (p->a[n], p->c[n], p->b[n]);
> +  else if (n > 3)
> +    foo (p->b[n], p->a[n], p->c[n]);
> +}
> +
> +/* { dg-final { scan-rtl-dump-times "\\(mem/s:SI \\(plus:" 6 "expand" } } */
> +/* { dg-final { scan-rtl-dump-times "const_int 128" 3 "expand" } } */
> +/* { dg-final { scan-rtl-dump-times "const_int 64 \\\[0x40\\\]\\)\\) \\\[" 3 
> "expand" } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> Index: gcc/testsuite/gcc.dg/tree-ssa/pr46556-3.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/pr46556-3.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr46556-3.c (revision 0)
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-rtl-expand" } */
> +struct x
> +{
> +  int a[16];
> +  int b[16];
> +  int c[16];
> +};
> +
> +extern void foo (int, int, int);
> +
> +void
> +f (struct x *p, unsigned int n)
> +{
> +  foo (p->a[n], p->c[n], p->b[n]);
> +  if (n > 3)
> +    {
> +      foo (p->a[n], p->c[n], p->b[n]);
> +      if (n > 12)
> +     foo (p->b[n], p->a[n], p->c[n]);
> +    }
> +}
> +
> +/* { dg-final { scan-rtl-dump-times "\\(mem/s:SI \\(plus:" 6 "expand" } } */
> +/* { dg-final { scan-rtl-dump-times "const_int 128" 3 "expand" } } */
> +/* { dg-final { scan-rtl-dump-times "const_int 64 \\\[0x40\\\]\\)\\) \\\[" 3 
> "expand" } } */
> +/* { dg-final { cleanup-rtl-dump "expand" } } */
> Index: gcc/expr.c
> ===================================================================
> --- gcc/expr.c        (revision 180378)
> +++ gcc/expr.c        (working copy)
> @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "ssaexpand.h"
>  #include "target-globals.h"
>  #include "params.h"
> +#include "tree-pretty-print.h"
> 
>  /* Decide whether a function's arguments should be processed
>     from first to last or from last to first.
> @@ -7648,7 +7649,66 @@ expand_constructor (tree exp, rtx target, enum exp
>    return target;
>  }
> 
> +/* Given BASE, OFFSET, and BITPOS derived from EXPR, determine whether
> +   there is a profitable opportunity to restructure address arithmetic
> +   within BASE and OFFSET.  If so, produce such a restructuring and
> +   return it.  */
> +/* TODO: This belongs more properly in a separate pass that performs
> +   general strength reduction on straight-line code.  Eventually move
> +   this there.  */
> 
> +static tree
> +restructure_base_and_offset (tree expr, tree base, tree offset,
> +                          HOST_WIDE_INT bitpos)
> +{
> +  tree mult_op0, mult_op1, t1, t2, offset_type, mult_expr, add_expr, mem_ref;
> +  double_int c, c1, c2, c3, c4;
> +
> +  /* Look for the following pattern:
> +
> +       base   = MEM_REF (T1, C1);
> +       offset = MULT_EXPR (PLUS_EXPR (T2, C2), C3)
> +       bitpos = C4 * BITS_PER_UNIT
> +
> +     If found, convert it to:
> +
> +       MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
> +                C1 + (C2 * C3) + C4)
> +   */
> +  if (!base
> +      || !offset
> +      || TREE_CODE (base) != MEM_REF
> +      || TREE_CODE (offset) != MULT_EXPR)
> +    return NULL_TREE;
> +
> +  mult_op0 = TREE_OPERAND (offset, 0);
> +  mult_op1 = TREE_OPERAND (offset, 1);
> +
> +  if (TREE_CODE (mult_op0) != PLUS_EXPR
> +      || TREE_CODE (mult_op1) != INTEGER_CST
> +      || TREE_CODE (TREE_OPERAND (mult_op0, 1)) != INTEGER_CST)
> +    return NULL_TREE;
> +
> +  t1 = TREE_OPERAND (base, 0);
> +  t2 = TREE_OPERAND (mult_op0, 0);
> +  c1 = mem_ref_offset (base);
> +  c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
> +  c3 = tree_to_double_int (mult_op1);
> +  c4 = uhwi_to_double_int (bitpos / BITS_PER_UNIT);
> +  c = double_int_add (double_int_add (c1, double_int_mul (c2, c3)), c4);
> +
> +  offset_type = TREE_TYPE (TREE_OPERAND (base, 1));
> +
> +  mult_expr = fold_build2 (MULT_EXPR, sizetype, t2, 
> +                        double_int_to_tree (sizetype, c3));
> +
> +  add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (t1), t1, mult_expr);
> +
> +  mem_ref = fold_build2 (MEM_REF, TREE_TYPE (expr), add_expr,
> +                      double_int_to_tree (offset_type, c));
> +  return mem_ref;
> +}
> +
>  /* expand_expr: generate code for computing expression EXP.
>     An rtx for the computed value is returned.  The value is never null.
>     In the case of a void EXP, const0_rtx is returned.
> @@ -9584,7 +9644,7 @@ expand_expr_real_1 (tree exp, rtx target, enum mac
>        {
>       enum machine_mode mode1, mode2;
>       HOST_WIDE_INT bitsize, bitpos;
> -     tree offset;
> +     tree offset, tem2;
>       int volatilep = 0, must_force_mem;
>       bool packedp = false;
>       tree tem = get_inner_reference (exp, &bitsize, &bitpos, &offset,
> @@ -9601,6 +9661,36 @@ expand_expr_real_1 (tree exp, rtx target, enum mac
>               && DECL_PACKED (TREE_OPERAND (exp, 1))))
>         packedp = true;
> 
> +     /* Attempt to restructure the base and offset to combine constants.
> +        Bitfield references should eventually be lowered prior to this
> +        point and are not dealt with.  Skip BLKmode aggregates as well.
> +        This generates dead code, so require -O2.  */
> +     if (optimize > 1
> +         && code != BIT_FIELD_REF
> +         && (code != COMPONENT_REF 
> +             || !DECL_BIT_FIELD (TREE_OPERAND (exp, 1)))
> +         && TYPE_MODE (TREE_TYPE (exp)) != BLKmode
> +         && bitpos % BITS_PER_UNIT == 0
> +         && ((tem2 = restructure_base_and_offset (exp, tem, offset, bitpos))
> +             != NULL_TREE))
> +       {
> +         copy_ref_info (tem2, exp);
> +         tem = tem2;
> +         /* The offset and bitpos were absorbed into the new MEM_REF, so 
> +            make sure we don't add them in again.  */
> +         offset = NULL_TREE;
> +         bitpos = 0;
> +
> +         if (dump_file && (dump_flags & TDF_DETAILS))
> +           {
> +             fprintf (dump_file, "\nRestructured inner reference:\n");
> +             fprintf (dump_file, "  Original reference: ");
> +             print_generic_expr (dump_file, exp, TDF_VOPS|TDF_MEMSYMS);
> +             fprintf (dump_file, "\n  Replacement: ");
> +             print_generic_expr (dump_file, tem, TDF_VOPS|TDF_MEMSYMS);
> +           }
> +       }
> +
>       /* If TEM's type is a union of variable size, pass TARGET to the inner
>          computation, since it will need a temporary and TARGET is known
>          to have to do.  This occurs in unchecked conversion in Ada.  */
> Index: gcc/Makefile.in
> ===================================================================
> --- gcc/Makefile.in   (revision 180378)
> +++ gcc/Makefile.in   (working copy)
> @@ -2926,7 +2926,7 @@ expr.o : expr.c $(CONFIG_H) $(SYSTEM_H) coretypes.
>     reload.h langhooks.h intl.h $(TM_P_H) $(TARGET_H) \
>     tree-iterator.h gt-expr.h $(MACHMODE_H) $(TIMEVAR_H) $(TREE_FLOW_H) \
>     $(TREE_PASS_H) $(DF_H) $(DIAGNOSTIC_H) vecprim.h $(SSAEXPAND_H) \
> -   $(PARAMS_H) $(COMMON_TARGET_H)
> +   $(PARAMS_H) $(COMMON_TARGET_H) tree-pretty-print.h
>  dojump.o : dojump.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) 
> $(TREE_H) \
>     $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) $(OPTABS_H) $(INSN_ATTR_H) 
> insn-config.h \
>     langhooks.h $(GGC_H) gt-dojump.h vecprim.h $(BASIC_BLOCK_H) output.h
> 
> 


Reply via email to