On Thu, 12 Sep 2013, Jakub Jelinek wrote: > Hi! > > This patch fixes PR58392 and some related issues I've discovered. > move_sese_region_to_fn wasn't remapping loop->simduid VAR_DECL, > and wasn't setting dest_cfun->has_{simduid,force_vect}_loops > when needed, the inliner wasn't copying/remapping simduid nor > force_vect (just conservatively setting cfun->has_{simduid,force_vect}_loops > if src_cfun had those set) and omp-low.c didn't handle outer var refs in > simd loops nested in non-parallel/task contexts well. > Tested on x86_64-linux. Ok for gomp-4_0-branch and trunk (just the part > without the testcase for now, because Aldy's backport of the stuff this > is fixing is already on the trunk)?
Ok. Thanks, Richard. > 2013-09-12 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/58392 > * tree-cfg.c (move_sese_region_to_fn): Rename loop variable > to avoid shadowing of outer loop variable. If > saved_cfun->has_simduid_loops or saved_cfun->has_force_vect_loops, > replace_by_duplicate_decl simduid of loops that have it set and > set dest_cfun->has_simduid_loops and/or > dest_cfun->has_force_vect_loops. > * omp-low.c (build_outer_var_ref): Call maybe_lookup_decl_in_outer_ctx > instead of maybe_lookup_decl. > * tree-inline.c (copy_loops): Change blocks_to_copy argument to id. > Use id->blocks_to_copy instead of blocks_to_copy. Adjust recursive > call. Copy over force_vect and copy and remap simduid. Set > cfun->has_simduid_loops and/or cfun->has_force_vect_loops. > (copy_cfg_body): Remove blocks_to_copy argument. Use > id->blocks_to_copy instead of blocks_to_copy. Adjust copy_loops > caller. Don't set cfun->has_simduid_loops and/or > cfun->has_force_vect_loops here. > (copy_body): Remove blocks_to_copy argument. Adjust copy_cfg_body > caller. > (expand_call_inline, tree_function_versioning): Adjust copy_body > callers. > > * testsuite/libgomp.c/pr58392.c: New test. > > --- gcc/tree-cfg.c.jj 2013-09-05 09:19:03.000000000 +0200 > +++ gcc/tree-cfg.c 2013-09-12 15:19:40.103267115 +0200 > @@ -6777,10 +6777,10 @@ move_sese_region_to_fn (struct function > if (bb->loop_father->header == bb > && loop_outer (bb->loop_father) == loop) > { > - struct loop *loop = bb->loop_father; > + struct loop *this_loop = bb->loop_father; > flow_loop_tree_node_remove (bb->loop_father); > - flow_loop_tree_node_add (get_loop (dest_cfun, 0), loop); > - fixup_loop_arrays_after_move (saved_cfun, cfun, loop); > + flow_loop_tree_node_add (get_loop (dest_cfun, 0), this_loop); > + fixup_loop_arrays_after_move (saved_cfun, cfun, this_loop); > } > > /* Remove loop exits from the outlined region. */ > @@ -6835,6 +6835,23 @@ move_sese_region_to_fn (struct function > outer; outer = loop_outer (outer)) > outer->num_nodes -= bbs.length (); > > + if (saved_cfun->has_simduid_loops || saved_cfun->has_force_vect_loops) > + { > + struct loop *aloop; > + for (i = 0; vec_safe_iterate (loops->larray, i, &aloop); i++) > + if (aloop != NULL) > + { > + if (aloop->simduid) > + { > + replace_by_duplicate_decl (&aloop->simduid, d.vars_map, > + d.to_context); > + dest_cfun->has_simduid_loops = true; > + } > + if (aloop->force_vect) > + dest_cfun->has_force_vect_loops = true; > + } > + } > + > /* Rewire BLOCK_SUBBLOCKS of orig_block. */ > if (orig_block) > { > --- gcc/omp-low.c.jj 2013-09-11 18:19:39.000000000 +0200 > +++ gcc/omp-low.c 2013-09-12 13:55:34.081148580 +0200 > @@ -976,7 +976,7 @@ build_outer_var_ref (tree var, omp_conte > if (ctx->outer && is_taskreg_ctx (ctx)) > x = lookup_decl (var, ctx->outer); > else if (ctx->outer) > - x = maybe_lookup_decl (var, ctx->outer); > + x = maybe_lookup_decl_in_outer_ctx (var, ctx); > if (x == NULL_TREE) > x = var; > } > --- gcc/tree-inline.c.jj 2013-08-27 20:53:28.000000000 +0200 > +++ gcc/tree-inline.c 2013-09-12 16:05:58.348124256 +0200 > @@ -2254,14 +2254,14 @@ maybe_move_debug_stmts_to_successors (co > as siblings of DEST_PARENT. */ > > static void > -copy_loops (bitmap blocks_to_copy, > +copy_loops (copy_body_data *id, > struct loop *dest_parent, struct loop *src_parent) > { > struct loop *src_loop = src_parent->inner; > while (src_loop) > { > - if (!blocks_to_copy > - || bitmap_bit_p (blocks_to_copy, src_loop->header->index)) > + if (!id->blocks_to_copy > + || bitmap_bit_p (id->blocks_to_copy, src_loop->header->index)) > { > struct loop *dest_loop = alloc_loop (); > > @@ -2285,8 +2285,19 @@ copy_loops (bitmap blocks_to_copy, > place_new_loop (cfun, dest_loop); > flow_loop_tree_node_add (dest_parent, dest_loop); > > + if (src_loop->simduid) > + { > + dest_loop->simduid = remap_decl (src_loop->simduid, id); > + cfun->has_simduid_loops = true; > + } > + if (src_loop->force_vect) > + { > + dest_loop->force_vect = true; > + cfun->has_force_vect_loops = true; > + } > + > /* Recurse. */ > - copy_loops (blocks_to_copy, dest_loop, src_loop); > + copy_loops (id, dest_loop, src_loop); > } > src_loop = src_loop->next; > } > @@ -2315,7 +2326,7 @@ redirect_all_calls (copy_body_data * id, > static tree > copy_cfg_body (copy_body_data * id, gcov_type count, int frequency_scale, > basic_block entry_block_map, basic_block exit_block_map, > - bitmap blocks_to_copy, basic_block new_entry) > + basic_block new_entry) > { > tree callee_fndecl = id->src_fn; > /* Original cfun for the callee, doesn't change. */ > @@ -2379,7 +2390,7 @@ copy_cfg_body (copy_body_data * id, gcov > > /* Use aux pointers to map the original blocks to copy. */ > FOR_EACH_BB_FN (bb, cfun_to_copy) > - if (!blocks_to_copy || bitmap_bit_p (blocks_to_copy, bb->index)) > + if (!id->blocks_to_copy || bitmap_bit_p (id->blocks_to_copy, bb->index)) > { > basic_block new_bb = copy_bb (id, bb, frequency_scale, count_scale); > bb->aux = new_bb; > @@ -2393,8 +2404,8 @@ copy_cfg_body (copy_body_data * id, gcov > bool can_make_abormal_goto > = id->gimple_call && stmt_can_make_abnormal_goto (id->gimple_call); > FOR_ALL_BB_FN (bb, cfun_to_copy) > - if (!blocks_to_copy > - || (bb->index > 0 && bitmap_bit_p (blocks_to_copy, bb->index))) > + if (!id->blocks_to_copy > + || (bb->index > 0 && bitmap_bit_p (id->blocks_to_copy, bb->index))) > need_debug_cleanup |= copy_edges_for_bb (bb, count_scale, > exit_block_map, > can_make_abormal_goto); > > @@ -2409,12 +2420,10 @@ copy_cfg_body (copy_body_data * id, gcov > if (loops_for_fn (src_cfun) != NULL > && current_loops != NULL) > { > - copy_loops (blocks_to_copy, entry_block_map->loop_father, > + copy_loops (id, entry_block_map->loop_father, > get_loop (src_cfun, 0)); > /* Defer to cfgcleanup to update loop-father fields of basic-blocks. > */ > loops_state_set (LOOPS_NEED_FIXUP); > - cfun->has_force_vect_loops |= src_cfun->has_force_vect_loops; > - cfun->has_simduid_loops |= src_cfun->has_simduid_loops; > } > > /* If the loop tree in the source function needed fixup, mark the > @@ -2424,8 +2433,8 @@ copy_cfg_body (copy_body_data * id, gcov > > if (gimple_in_ssa_p (cfun)) > FOR_ALL_BB_FN (bb, cfun_to_copy) > - if (!blocks_to_copy > - || (bb->index > 0 && bitmap_bit_p (blocks_to_copy, bb->index))) > + if (!id->blocks_to_copy > + || (bb->index > 0 && bitmap_bit_p (id->blocks_to_copy, bb->index))) > copy_phis_for_bb (bb, id); > > FOR_ALL_BB_FN (bb, cfun_to_copy) > @@ -2597,7 +2606,7 @@ copy_tree_body (copy_body_data *id) > static tree > copy_body (copy_body_data *id, gcov_type count, int frequency_scale, > basic_block entry_block_map, basic_block exit_block_map, > - bitmap blocks_to_copy, basic_block new_entry) > + basic_block new_entry) > { > tree fndecl = id->src_fn; > tree body; > @@ -2605,7 +2614,7 @@ copy_body (copy_body_data *id, gcov_type > /* If this body has a CFG, walk CFG and copy. */ > gcc_assert (ENTRY_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (fndecl))); > body = copy_cfg_body (id, count, frequency_scale, entry_block_map, > exit_block_map, > - blocks_to_copy, new_entry); > + new_entry); > copy_debug_stmts (id); > > return body; > @@ -4209,7 +4218,7 @@ expand_call_inline (basic_block bb, gimp > duplicate our body before altering anything. */ > copy_body (id, bb->count, > GCOV_COMPUTE_SCALE (cg_edge->frequency, CGRAPH_FREQ_BASE), > - bb, return_block, NULL, NULL); > + bb, return_block, NULL); > > /* Reset the escaped solution. */ > if (cfun->gimple_df) > @@ -5336,7 +5345,7 @@ tree_function_versioning (tree old_decl, > > /* Copy the Function's body. */ > copy_body (&id, old_entry_block->count, REG_BR_PROB_BASE, > - ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, blocks_to_copy, new_entry); > + ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, new_entry); > > /* Renumber the lexical scoping (non-code) blocks consecutively. */ > number_blocks (new_decl); > --- libgomp/testsuite/libgomp.c/pr58392.c.jj 2013-09-12 16:09:10.493065328 > +0200 > +++ libgomp/testsuite/libgomp.c/pr58392.c 2013-09-12 16:08:57.000000000 > +0200 > @@ -0,0 +1,58 @@ > +/* PR tree-optimization/58392 */ > +/* { dg-do run } */ > +/* { dg-options "-O2" } */ > +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ > +/* { dg-additional-options "-mavx" { target avx_runtime } } */ > + > +extern void abort (void); > +int d[32 * 32]; > + > +__attribute__((noinline, noclone)) int > +foo (int a, int b) > +{ > + int j, c = 0; > + #pragma omp parallel for reduction(+: c) > + for (j = 0; j < a; j += 32) > + { > + int l; > + #pragma omp simd reduction(+: c) > + for (l = 0; l < b; ++l) > + c += d[j + l]; > + } > + return c; > +} > + > +__attribute__((noinline, noclone)) int > +bar (int a) > +{ > + int j, c = 0; > + #pragma omp parallel for simd reduction(+: c) > + for (j = 0; j < a; ++j) > + c += d[j]; > + return c; > +} > + > +__attribute__((noinline)) static int > +baz (int a) > +{ > + int j, c = 0; > + #pragma omp simd reduction(+: c) > + for (j = 0; j < a; ++j) > + c += d[j]; > + return c; > +} > + > +int > +main () > +{ > + int i; > + for (i = 0; i < 32 * 32; i++) > + d[i] = (i & 31); > + if (foo (32 * 32, 32) != (31 * 32 / 2) * 32) > + abort (); > + if (bar (32 * 32) != (31 * 32 / 2) * 32) > + abort (); > + if (baz (32 * 32) != (31 * 32 / 2) * 32) > + abort (); > + return 0; > +} > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend