On Wed, Sep 4, 2013 at 5:20 PM, Richard Biener <rguent...@suse.de> wrote:
>
> The patch below moves IVOPTs out of the GIMPLE loop pipeline more
> closer to RTL expansion.  That's done for multiple reasons.
>
> First, the loop passes that at the moment preceede IVOPTs leave
> around IL that is in desparate need of basic re-optimization
> like CSE, constant propagation and DCE.  That puts extra load
> on IVOPTs and its cost model, increasing compile-time and
> possibly confusing it.
>
> Second, IVOPTs introduces lowered memory accesses that it
> expects to stay as is, likewise it produces auto-inc/dec
> sequences that it expects to stay as is until RTL expansion.
> Passes such as DOM can break this expectation and make the
> work done by IVOPTs a waste.
>
> I remember doing this excercise in the GCC 4.3 timeframe where
> benchmarking on x86_64 showed no gains or losses (but x86_64
> isn't very sensitive to IV choices).
>
> Any help with benchmarking this on targets other than x86_64
> is appreciated (I'll re-do x86_64).
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> General comments are of course also welcome.
>
> Thanks,
> Richard.
>
> 2013-09-04  Richard Biener  <rguent...@suse.de>
>
>         * passes.def: Move IVOPTs before final DCE pass.
>         * tree-ssa-loop.c (tree_ssa_loop_ivopts): Adjust for being
>         outside of the loop pipeline.
>
>         * gcc.dg/tree-ssa/ivopts-3.c: Scan non-details dump.
>         * gcc.dg/tree-ssa/reassoc-19.c: Be more permissive.
>
> Index: gcc/passes.def
> ===================================================================
> *** gcc/passes.def.orig 2013-09-04 10:57:33.000000000 +0200
> --- gcc/passes.def      2013-09-04 11:11:27.535952665 +0200
> *************** along with GCC; see the file COPYING3.
> *** 221,227 ****
>           NEXT_PASS (pass_complete_unroll);
>           NEXT_PASS (pass_slp_vectorize);
>           NEXT_PASS (pass_loop_prefetch);
> -         NEXT_PASS (pass_iv_optimize);
>           NEXT_PASS (pass_lim);
>           NEXT_PASS (pass_tree_loop_done);
>         POP_INSERT_PASSES ()
> --- 221,226 ----
> *************** along with GCC; see the file COPYING3.
> *** 237,242 ****
> --- 236,246 ----
>          opportunities.  */
>         NEXT_PASS (pass_phi_only_cprop);
>         NEXT_PASS (pass_vrp);
> +       /* IVOPTs lowers memory accesses and exposes auto-inc/dec
> +          opportunities.  Run it after the above passes cleaned up
> +        the loop optimized IL but before DCE as IVOPTs generates
> +        quite some garbage.  */
> +       NEXT_PASS (pass_iv_optimize);
>         NEXT_PASS (pass_cd_dce);
>         NEXT_PASS (pass_tracer);
>
> Index: gcc/tree-ssa-loop.c
> ===================================================================
> *** gcc/tree-ssa-loop.c.orig    2013-09-04 10:57:32.000000000 +0200
> --- gcc/tree-ssa-loop.c 2013-09-04 11:11:27.536952677 +0200
> *************** make_pass_loop_prefetch (gcc::context *c
> *** 906,915 ****
>   static unsigned int
>   tree_ssa_loop_ivopts (void)
>   {
> !   if (number_of_loops (cfun) <= 1)
> !     return 0;
>
> -   tree_ssa_iv_optimize ();
>     return 0;
>   }
>
> --- 906,924 ----
>   static unsigned int
>   tree_ssa_loop_ivopts (void)
>   {
> !   loop_optimizer_init (LOOPS_NORMAL
> !                      | LOOPS_HAVE_RECORDED_EXITS);
> !
> !   if (number_of_loops (cfun) > 1)
> !     {
> !       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
> !       scev_initialize ();
> !       tree_ssa_iv_optimize ();
> !       scev_finalize ();
> !     }
> !
> !   loop_optimizer_finalize ();
>
>     return 0;
>   }
>
> Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c
> ===================================================================
> *** gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c.orig       2013-09-04 
> 10:57:33.000000000 +0200
> --- gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c    2013-09-04 11:11:27.559952952 
> +0200
> ***************
> *** 1,5 ****
>   /* { dg-do compile } */
> ! /* { dg-options "-O2 -fdump-tree-ivopts-details" } */
>
>   void main (void)
>   {
> --- 1,5 ----
>   /* { dg-do compile } */
> ! /* { dg-options "-O2 -fdump-tree-ivopts" } */
>
>   void main (void)
>   {
> *************** void main (void)
> *** 8,12 ****
>       f2 ();
>   }
>
> ! /* { dg-final { scan-tree-dump-times "!= 0" 5 "ivopts" } }  */
>   /* { dg-final { cleanup-tree-dump "ivopts" } }  */
> --- 8,12 ----
>       f2 ();
>   }
>
> ! /* { dg-final { scan-tree-dump-times "!= 0" 1 "ivopts" } }  */
>   /* { dg-final { cleanup-tree-dump "ivopts" } }  */
> Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-19.c
> ===================================================================
> *** gcc/testsuite/gcc.dg/tree-ssa/reassoc-19.c.orig     2012-12-18 
> 14:24:58.000000000 +0100
> --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-19.c  2013-09-04 11:13:30.895416700 
> +0200
> *************** void foo(char* left, char* rite, int ele
> *** 16,22 ****
>     }
>   }
>
> ! /* { dg-final { scan-tree-dump-times "= \\\(sizetype\\\) element" 1 
> "optimized" } } */
>   /* { dg-final { scan-tree-dump-times "= -" 1 "optimized" } } */
>   /* { dg-final { scan-tree-dump-times " \\\+ " 1 "optimized" } } */
>   /* { dg-final { cleanup-tree-dump "optimized" } } */
> --- 16,22 ----
>     }
>   }
>
> ! /* { dg-final { scan-tree-dump-times "= \\\(\[^)\]*\\\) element" 1 
> "optimized" } } */
>   /* { dg-final { scan-tree-dump-times "= -" 1 "optimized" } } */
>   /* { dg-final { scan-tree-dump-times " \\\+ " 1 "optimized" } } */
>   /* { dg-final { cleanup-tree-dump "optimized" } } */

Hi,
IVOPT transformation depends on loop invariant heavily, it generates
some loop invariants during rewriting iv uses and depends on
loop-invariant pass to hoist them outside of loop, so the position of
loop invariant pass may matter too if we move IVOPT.

-- 
Best Regards.

Reply via email to