In this PR we have a publication safety violation in a transaction. The
loop invariant motion pass hoists a load out of a loop, creating a load
data race. Those unfamiliar with load data races in transactions,
please see the PR, as this has been confusing to most (me included).
In the snippet below, we are not allowed to hoist DATA out unless it
will be loaded on every path out of the loop:
__transaction_atomic
{
for (i = 0; i < 10; i++)
if (x[i])
x[i] += DATA;
// OK to hoist DATA above out if also loaded here:
// blah = DATA;
}
rth gave me the evil eye on a previous incantation of this patch, and
I'm sure this one is not totally devoid of grime.
The main problem is how to record all loads in a given function in an
efficient manner, so we can properly tease the information out in
every_path_out_of_loop_has_load(). In my first revision I had some data
flow equations that calculated loads on different paths, and rth just
about hit me. Instead now I save all loads in a function and iterate
through them in a brute force way. I'd like to rewrite this into a hash
of some sort, but before I go any further I'm interested to know if the
main idea is ok.
FYI, it has been suggested to use the mem_ref_p information already
available in the pass, but I need information on all loads, not just
those occurring inside loops.
BTW, this only fixes the loop invariant motion pass. I'm sure there are
other passes that will need equally painful fixes.
Aldy
* tree-ssa-loop-im.c: New global all_tm_blocks.
(every_path_out_of_loop_has_load): New.
(movement_possibility): Restrict movement of transaction loads.
(tree_ssa_lim_initialize): Call tree_ssa_lim_tm_initialize.
(tree_ssa_lim_finalize): Call tree_ssa_lim_tm_finalize.
(tree_ssa_lim_tm_initialize): New.
(tree_ssa_lim_tm_finalize): New.
* tree.h (get_all_tm_blocks): Protoize.
* trans-mem.c (tm_region_init): Use the heap to store BB
auxilliary data.
(get_all_tm_blocks): New.
Index: tree-ssa-loop-im.c
===================================================================
--- tree-ssa-loop-im.c (revision 184445)
+++ tree-ssa-loop-im.c (working copy)
@@ -150,7 +150,7 @@ typedef struct mem_ref
bitmap indep_ref; /* The set of memory references on that
this reference is independent. */
- bitmap dep_ref; /* The complement of DEP_REF. */
+ bitmap dep_ref; /* The complement of INDEP_REF. */
} *mem_ref_p;
DEF_VEC_P(mem_ref_p);
@@ -189,6 +189,13 @@ static struct
static bool ref_indep_loop_p (struct loop *, mem_ref_p);
+/* All basic blocks that are within a transaction in the current
+ function. */
+static bitmap all_tm_blocks;
+
+/* All the loads in the current function. */
+static mem_ref_locs_p all_loads;
+
/* Minimum cost of an expensive expression. */
#define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
@@ -337,6 +344,26 @@ for_each_index (tree *addr_p, bool (*cbc
}
}
+/* Return true if every path out of the loop containing STMT loads the
+ decl loaded in STMT. */
+
+static bool
+every_path_out_of_loop_has_load (gimple stmt)
+{
+ basic_block bb = gimple_bb (stmt);
+ basic_block header = bb->loop_father->header;
+ unsigned int i;
+ mem_ref_loc_p aref;
+ tree rhs = gimple_assign_rhs1 (stmt);
+
+ /* Return true if the same load occurs on every path out of the loop. */
+ FOR_EACH_VEC_ELT (mem_ref_loc_p, all_loads->locs, i, aref)
+ if (rhs == *aref->ref
+ && dominated_by_p (CDI_POST_DOMINATORS, header, gimple_bb (aref->stmt)))
+ return true;
+ return false;
+}
+
/* If it is possible to hoist the statement STMT unconditionally,
returns MOVE_POSSIBLE.
If it is possible to hoist the statement STMT, but we must avoid making
@@ -412,6 +439,26 @@ movement_possibility (gimple stmt)
|| gimple_could_trap_p (stmt))
return MOVE_PRESERVE_EXECUTION;
+ /* Non local loads in a transaction cannot be hoisted out unless the
+ load happens on every path out of the loop. */
+ if (flag_tm
+ && bitmap_bit_p (all_tm_blocks, gimple_bb (stmt)->index)
+ && gimple_assign_single_p (stmt))
+ {
+ tree rhs = gimple_assign_rhs1 (stmt);
+ if (DECL_P (rhs) && is_global_var (rhs)
+ && !every_path_out_of_loop_has_load (stmt))
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Cannot hoist conditional load of ");
+ print_generic_expr (dump_file, rhs, TDF_SLIM);
+ fprintf (dump_file, " because it is in a transaction.\n");
+ }
+ return MOVE_IMPOSSIBLE;
+ }
+ }
+
return ret;
}
@@ -2358,6 +2405,42 @@ fill_always_executed_in (struct loop *lo
fill_always_executed_in (loop, contains_call);
}
+/* Compute global information needed for transactional restrictions in
+ the loop invariant motion pass. */
+
+static void
+tree_ssa_lim_tm_initialize (void)
+{
+ basic_block bb;
+ gimple_stmt_iterator bsi;
+
+ calculate_dominance_info (CDI_POST_DOMINATORS);
+ all_tm_blocks = get_all_tm_blocks ();
+ all_loads = mem_ref_locs_alloc ();
+
+ /* Save all the loads in the current function. */
+ /* ?? This needs to be rewritten into something more efficient, like
+ a hash. */
+ FOR_EACH_BB (bb)
+ for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
+ {
+ gimple stmt = gsi_stmt (bsi);
+
+ if (is_gimple_assign (stmt)
+ && gimple_assign_single_p (stmt))
+ {
+ tree *rhs_p = gimple_assign_rhs1_ptr (stmt);
+ if (DECL_P (*rhs_p) && is_global_var (*rhs_p))
+ {
+ mem_ref_loc_p aref = XNEW (struct mem_ref_loc);
+ aref->stmt = stmt;
+ aref->ref = rhs_p;
+ VEC_safe_push (mem_ref_loc_p, heap, all_loads->locs, aref);
+ }
+ }
+ }
+}
+
/* Compute the global information needed by the loop invariant motion pass. */
static void
@@ -2387,6 +2470,16 @@ tree_ssa_lim_initialize (void)
sbitmap_free (contains_call);
lim_aux_data_map = pointer_map_create ();
+
+ if (flag_tm)
+ tree_ssa_lim_tm_initialize ();
+}
+
+static void
+tree_ssa_lim_tm_finalize (void)
+{
+ BITMAP_FREE (all_tm_blocks);
+ free_mem_ref_locs (all_loads);
}
/* Cleans up after the invariant motion pass. */
@@ -2420,6 +2513,9 @@ tree_ssa_lim_finalize (void)
if (memory_accesses.ttae_cache)
pointer_map_destroy (memory_accesses.ttae_cache);
+
+ if (flag_tm)
+ tree_ssa_lim_tm_finalize ();
}
/* Moves invariants from loops. Only "expensive" invariants are moved out --
Index: tree.h
===================================================================
--- tree.h (revision 184445)
+++ tree.h (working copy)
@@ -5875,6 +5875,7 @@ extern bool is_tm_may_cancel_outer (tree
extern bool is_tm_ending_fndecl (tree);
extern void record_tm_replacement (tree, tree);
extern void tm_malloc_replacement (tree);
+extern bitmap get_all_tm_blocks (void);
static inline bool
is_tm_safe_or_pure (const_tree x)
Index: testsuite/gcc.dg/tm/pub-safety-1.c
===================================================================
--- testsuite/gcc.dg/tm/pub-safety-1.c (revision 0)
+++ testsuite/gcc.dg/tm/pub-safety-1.c (revision 0)
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-fgnu-tm -O1 -fdump-tree-lim1" } */
+
+/* Test that thread visible loads do not get hoisted out of loops if
+ the load would not have occurred on each path out of the loop. */
+
+int x[10] = {0,0,0,0,0,0,0,0,0,0};
+int DATA_DATA = 0;
+
+void reader()
+{
+ int i;
+ __transaction_atomic
+ {
+ for (i = 0; i < 10; i++)
+ if (x[i])
+ x[i] += DATA_DATA;
+ /* If we loaded DATA_DATA here, we could hoist it before the loop,
+ but since we don't... we can't. */
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "Cannot hoist.*DATA_DATA because it is in
a transaction" 1 "lim1" } } */
+/* { dg-final { cleanup-tree-dump "lim1" } } */
Index: testsuite/gcc.dg/tm/pub-safety-2.c
===================================================================
--- testsuite/gcc.dg/tm/pub-safety-2.c (revision 0)
+++ testsuite/gcc.dg/tm/pub-safety-2.c (revision 0)
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-fgnu-tm -O1 -fdump-tree-lim1" } */
+
+/* Test that thread visible loads do not get hoisted out of loops if
+ the load would not have occurred on each path out of the loop. */
+
+int x[10] = {0,0,0,0,0,0,0,0,0,0};
+int DATA_DATA = 0, george;
+
+void reader()
+{
+ int i;
+ __transaction_atomic
+ {
+ for (i = 0; i < 10; i++)
+ if (x[i])
+ x[i] += DATA_DATA;
+
+ /* Because of this load of DATA_DATA, we are loading DATA_DATA
+ on every path out of the loop. It is OK to hoist DATA_DATA
+ out. */
+ george = DATA_DATA;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "Cannot hoist.*DATA_DATA because it is in
a transaction" 0 "lim1" } } */
+/* { dg-final { cleanup-tree-dump "lim1" } } */
Index: trans-mem.c
===================================================================
--- trans-mem.c (revision 184445)
+++ trans-mem.c (working copy)
@@ -1858,18 +1858,25 @@ tm_region_init (struct tm_region *region
VEC(basic_block, heap) *queue = NULL;
bitmap visited_blocks = BITMAP_ALLOC (NULL);
struct tm_region *old_region;
+ struct tm_region **region_worklist;
all_tm_regions = region;
bb = single_succ (ENTRY_BLOCK_PTR);
+ /* We could store this information in bb->aux, but we may get called
+ through get_all_tm_blocks() from another pass that may be already
+ using bb->aux. */
+ region_worklist =
+ (struct tm_region **) xcalloc (sizeof (struct tm_region *),
+ n_basic_blocks + NUM_FIXED_BLOCKS);
+
VEC_safe_push (basic_block, heap, queue, bb);
- gcc_assert (!bb->aux); /* FIXME: Remove me. */
- bb->aux = region;
+ region_worklist[bb->index] = region;
do
{
bb = VEC_pop (basic_block, queue);
- region = (struct tm_region *)bb->aux;
- bb->aux = NULL;
+ region = region_worklist[bb->index];
+ region_worklist[bb->index] = NULL;
/* Record exit and irrevocable blocks. */
region = tm_region_init_1 (region, bb);
@@ -1886,20 +1893,20 @@ tm_region_init (struct tm_region *region
{
bitmap_set_bit (visited_blocks, e->dest->index);
VEC_safe_push (basic_block, heap, queue, e->dest);
- gcc_assert (!e->dest->aux); /* FIXME: Remove me. */
/* If the current block started a new region, make sure that only
the entry block of the new region is associated with this region.
Other successors are still part of the old region. */
if (old_region != region && e->dest != region->entry_block)
- e->dest->aux = old_region;
+ region_worklist[e->dest->index] = old_region;
else
- e->dest->aux = region;
+ region_worklist[e->dest->index] = region;
}
}
while (!VEC_empty (basic_block, queue));
VEC_free (basic_block, heap, queue);
BITMAP_FREE (visited_blocks);
+ free (region_worklist);
}
/* The "gate" function for all transactional memory expansion and optimization
@@ -2424,6 +2431,39 @@ get_tm_region_blocks (basic_block entry_
return bbs;
}
+/* Return a bitmap with all the basic blocks that appear in
+ transactions throughout current_function_decl. The bitmap must be
+ freed by the caller.
+
+ This bitmap is only usable by passes that don't adjust the CFG in
+ the middle of the pass. */
+
+bitmap
+get_all_tm_blocks (void)
+{
+ struct tm_region *region;
+ VEC (basic_block, heap) *queue;
+ bitmap all_tm_blocks = BITMAP_ALLOC (NULL);
+
+ /* ?? Perhaps we need to abstract gate_tm_init further, because we
+ certainly don't need it to calculate CDI_DOMINATOR info. */
+ gate_tm_init ();
+
+ for (region = all_tm_regions; region; region = region->next)
+ {
+ queue = get_tm_region_blocks (region->entry_block,
+ region->exit_blocks,
+ region->irr_blocks,
+ all_tm_blocks,
+ /*stop_at_irr_p=*/true);
+ VEC_free (basic_block, heap, queue);
+ }
+
+ if (all_tm_regions)
+ bitmap_obstack_release (&tm_obstack);
+ return all_tm_blocks;
+}
+
/* Entry point to the MARK phase of TM expansion. Here we replace
transactional memory statements with calls to builtins, and function
calls with their transactional clones (if available). But we don't