On Fri, 22 Sep 2017, Richard Biener wrote:
>
> This simplifies canonicalize_loop_closed_ssa and does other minimal
> TLC. It also adds a testcase I reduced from a stupid mistake I made
> when reworking canonicalize_loop_closed_ssa.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
>
> SPEC CPU 2006 is happy with it, current statistics on x86_64 with
> -Ofast -march=haswell -floop-nest-optimize are
>
> 61 loop nests "optimized"
> 45 loop nest transforms cancelled because of code generation issues
> 21 loop nest optimizations timed out the 350000 ISL "operations" we allow
Overall compile time (with -j6) is 695 sec. w/o -floop-nest-optimize
and 709 sec. with (this was with release checking).
A single-run has 416.gamess (580s -> 618s),
436.cactusADM (206s -> 182s), 437.leslie3d (228s ->218s),
450.soplex (229s -> 226s), 465.tonto (428s -> 425s), 401.bzip2 (383s ->
379s), 462.libquantum (352s -> 343s), ignoring +-2s changes. Will
do a 3-run for those to confirm (it would be only a single regression
for 416.gamess).
Sofar I'm positively surprised given the limitations (and inefficiencies)
I know.
I'll add some more opt-info stuff to assess the number of SCOPs we
detect but discard during further analysis and the number of transforms
we cancel because they turn out as a no-op.
Richard.
> {
> - if (dump_file && dump_flags)
> - fprintf (dump_file, "isl timed out --param max-isl-operations=%d\n",
> - max_operations);
> + location_t loc = find_loop_location
> + (scop->scop_info->region.entry->dest->loop_father);
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc,
> + "loop nest not optimized, optimization timed out "
> + "after %d operations [--param max-isl-operations]\n",
> + max_operations);
> return false;
> }
>
> Index: gcc/graphite.c
> ===================================================================
> --- gcc/graphite.c (revision 253091)
> +++ gcc/graphite.c (working copy)
> @@ -293,86 +293,6 @@ free_scops (vec<scop_p> scops)
> scops.release ();
> }
>
> -/* Returns true when P1 and P2 are close phis with the same
> - argument. */
> -
> -static inline bool
> -same_close_phi_node (gphi *p1, gphi *p2)
> -{
> - return (types_compatible_p (TREE_TYPE (gimple_phi_result (p1)),
> - TREE_TYPE (gimple_phi_result (p2)))
> - && operand_equal_p (gimple_phi_arg_def (p1, 0),
> - gimple_phi_arg_def (p2, 0), 0));
> -}
> -
> -static void make_close_phi_nodes_unique (basic_block bb);
> -
> -/* Remove the close phi node at GSI and replace its rhs with the rhs
> - of PHI. */
> -
> -static void
> -remove_duplicate_close_phi (gphi *phi, gphi_iterator *gsi)
> -{
> - gimple *use_stmt;
> - use_operand_p use_p;
> - imm_use_iterator imm_iter;
> - tree res = gimple_phi_result (phi);
> - tree def = gimple_phi_result (gsi->phi ());
> -
> - gcc_assert (same_close_phi_node (phi, gsi->phi ()));
> -
> - FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
> - {
> - FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
> - SET_USE (use_p, res);
> -
> - update_stmt (use_stmt);
> -
> - /* It is possible that we just created a duplicate close-phi
> - for an already-processed containing loop. Check for this
> - case and clean it up. */
> - if (gimple_code (use_stmt) == GIMPLE_PHI
> - && gimple_phi_num_args (use_stmt) == 1)
> - make_close_phi_nodes_unique (gimple_bb (use_stmt));
> - }
> -
> - remove_phi_node (gsi, true);
> -}
> -
> -/* Removes all the close phi duplicates from BB. */
> -
> -static void
> -make_close_phi_nodes_unique (basic_block bb)
> -{
> - gphi_iterator psi;
> -
> - for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
> - {
> - gphi_iterator gsi = psi;
> - gphi *phi = psi.phi ();
> -
> - /* At this point, PHI should be a close phi in normal form. */
> - gcc_assert (gimple_phi_num_args (phi) == 1);
> -
> - /* Iterate over the next phis and remove duplicates. */
> - gsi_next (&gsi);
> - while (!gsi_end_p (gsi))
> - if (same_close_phi_node (phi, gsi.phi ()))
> - remove_duplicate_close_phi (phi, &gsi);
> - else
> - gsi_next (&gsi);
> - }
> -}
> -
> -/* Return true when NAME is defined in LOOP. */
> -
> -static bool
> -defined_in_loop_p (tree name, loop_p loop)
> -{
> - gcc_assert (TREE_CODE (name) == SSA_NAME);
> - return loop == loop_containing_stmt (SSA_NAME_DEF_STMT (name));
> -}
> -
> /* Transforms LOOP to the canonical loop closed SSA form. */
>
> static void
> @@ -380,20 +300,22 @@ canonicalize_loop_closed_ssa (loop_p loo
> {
> edge e = single_exit (loop);
> basic_block bb;
> + gphi_iterator psi;
>
> if (!e || (e->flags & EDGE_COMPLEX))
> return;
>
> bb = e->dest;
>
> + /* Make the loop-close PHI node BB contain only PHIs and have a
> + single predecessor. */
> if (single_pred_p (bb))
> {
> e = split_block_after_labels (bb);
> - make_close_phi_nodes_unique (e->src);
> + bb = e->src;
> }
> else
> {
> - gphi_iterator psi;
> basic_block close = split_edge (e);
> e = single_succ_edge (close);
> for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
> @@ -404,7 +326,7 @@ canonicalize_loop_closed_ssa (loop_p loo
>
> /* Only add close phi nodes for SSA_NAMEs defined in LOOP. */
> if (TREE_CODE (arg) != SSA_NAME
> - || !defined_in_loop_p (arg, loop))
> + || loop_containing_stmt (SSA_NAME_DEF_STMT (arg)) != loop)
> continue;
>
> tree res = copy_ssa_name (arg);
> @@ -413,8 +335,30 @@ canonicalize_loop_closed_ssa (loop_p loo
> UNKNOWN_LOCATION);
> SET_USE (use_p, res);
> }
> + bb = close;
> + }
>
> - make_close_phi_nodes_unique (close);
> + /* Eliminate duplicates. This relies on processing loops from
> + innermost to outer. */
> + for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
> + {
> + gphi_iterator gsi = psi;
> + gphi *phi = psi.phi ();
> +
> + /* At this point, PHI should be a close phi in normal form. */
> + gcc_assert (gimple_phi_num_args (phi) == 1);
> +
> + /* Iterate over the next phis and remove duplicates. */
> + gsi_next (&gsi);
> + while (!gsi_end_p (gsi))
> + if (gimple_phi_arg_def (phi, 0) == gimple_phi_arg_def (gsi.phi (), 0))
> + {
> + replace_uses_by (gimple_phi_result (gsi.phi ()),
> + gimple_phi_result (phi));
> + remove_phi_node (&gsi, true);
> + }
> + else
> + gsi_next (&gsi);
> }
> }
>
> @@ -443,7 +387,7 @@ static void
> canonicalize_loop_closed_ssa_form (void)
> {
> loop_p loop;
> - FOR_EACH_LOOP (loop, 0)
> + FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
> canonicalize_loop_closed_ssa (loop);
>
> checking_verify_loop_closed_ssa (true);
> @@ -509,6 +453,19 @@ graphite_transform_loops (void)
> "loop nest optimized\n");
> }
>
> + if (dump_file && (dump_flags & TDF_DETAILS))
> + {
> + loop_p loop;
> + int num_no_dependency = 0;
> +
> + FOR_EACH_LOOP (loop, 0)
> + if (loop->can_be_parallel)
> + num_no_dependency++;
> +
> + fprintf (dump_file, "%d loops carried no dependency.\n",
> + num_no_dependency);
> + }
> +
> free_scops (scops);
> graphite_finalize (need_cfg_cleanup_p);
> the_isl_ctx = NULL;
> Index: gcc/testsuite/gcc.dg/graphite/scop-24.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/graphite/scop-24.c (nonexistent)
> +++ gcc/testsuite/gcc.dg/graphite/scop-24.c (working copy)
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -floop-nest-optimize" } */
> +
> +typedef struct _IO_FILE FILE;
> +extern struct _IO_FILE *stderr;
> +typedef float real;
> +typedef real rvec[3];
> +int rgbset (int);
> +void ps_box (int, int);
> +void plot_phi(char *fn,rvec box,int natoms,rvec x[],real phi[])
> +{
> + real phi_max,rr,gg,bb,fac,dx,x0,y0;
> + int i;
> + for(i=0; (i<natoms); i++)
> + phi_max=((phi_max > __builtin_fabs(phi[i]))
> + ? phi_max : __builtin_fabs(phi[i]));
> + if (__builtin_fabs(phi_max)<1.2e-38)
> + __builtin_fprintf(stderr, "X");
> + ps_box((real)(fac*box[0]-1),(real)(fac*box[1]-1));
> + for(i=0; (i<natoms); i++)
> + {
> + rr=gg=bb=1.0;
> + if (phi[i] < 0)
> + gg=bb=(1.0+(phi[i]/phi_max));
> + else
> + rr=gg=(1.0-(phi[i]/phi_max));
> + rr=rgbset(rr);
> + }
> +}
>
--
Richard Biener <[email protected]>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB
21284 (AG Nuernberg)