On Thu, Nov 3, 2011 at 8:32 PM, Aldy Hernandez <al...@redhat.com> wrote: > This is everything else that doesn't fit neatly into any other category. > Here are the middle end changes, as well as pass ordering code, along with > varasm and a potpourri of other small changes. > > This is the last patch. Please let me know if there is anything else > (reasonable) you would like me to post. > > Index: gcc/cgraph.h > =================================================================== > --- gcc/cgraph.h (.../trunk) (revision 180744) > +++ gcc/cgraph.h (.../branches/transactional-memory) (revision > 180773) > @@ -98,6 +98,9 @@ struct GTY(()) cgraph_local_info { > /* True when the function has been originally extern inline, but it is > redefined now. */ > unsigned redefined_extern_inline : 1; > + > + /* True if the function may enter serial irrevocable mode. */ > + unsigned tm_may_enter_irr : 1; > }; > > /* Information about the function that needs to be computed globally > @@ -565,6 +568,8 @@ void verify_cgraph_node (struct cgraph_n > void cgraph_build_static_cdtor (char which, tree body, int priority); > void cgraph_reset_static_var_maps (void); > void init_cgraph (void); > +struct cgraph_node * cgraph_copy_node_for_versioning (struct cgraph_node *, > + tree, VEC(cgraph_edge_p,heap)*, bitmap); > struct cgraph_node *cgraph_function_versioning (struct cgraph_node *, > VEC(cgraph_edge_p,heap)*, > VEC(ipa_replace_map_p,gc)*, > Index: gcc/tree-pass.h > =================================================================== > --- gcc/tree-pass.h (.../trunk) (revision 180744) > +++ gcc/tree-pass.h (.../branches/transactional-memory) (revision > 180773) > @@ -447,6 +447,12 @@ extern struct gimple_opt_pass pass_build > extern struct gimple_opt_pass pass_local_pure_const; > extern struct gimple_opt_pass pass_tracer; > extern struct gimple_opt_pass pass_warn_unused_result; > +extern struct gimple_opt_pass pass_diagnose_tm_blocks; > +extern struct gimple_opt_pass pass_lower_tm; > +extern struct gimple_opt_pass pass_tm_init; > +extern struct gimple_opt_pass pass_tm_mark; > +extern struct gimple_opt_pass pass_tm_memopt; > +extern struct gimple_opt_pass pass_tm_edges; > extern struct gimple_opt_pass pass_split_functions; > extern struct gimple_opt_pass pass_feedback_split_functions; > > @@ -469,6 +475,7 @@ extern struct ipa_opt_pass_d pass_ipa_pu > extern struct simple_ipa_opt_pass pass_ipa_pta; > extern struct ipa_opt_pass_d pass_ipa_lto_wpa_fixup; > extern struct ipa_opt_pass_d pass_ipa_lto_finish_out; > +extern struct simple_ipa_opt_pass pass_ipa_tm; > extern struct ipa_opt_pass_d pass_ipa_profile; > extern struct ipa_opt_pass_d pass_ipa_cdtor_merge; > > Index: gcc/rtlanal.c > =================================================================== > --- gcc/rtlanal.c (.../trunk) (revision 180744) > +++ gcc/rtlanal.c (.../branches/transactional-memory) (revision > 180773) > @@ -1918,6 +1918,7 @@ alloc_reg_note (enum reg_note kind, rtx > case REG_CC_USER: > case REG_LABEL_TARGET: > case REG_LABEL_OPERAND: > + case REG_TM: > /* These types of register notes use an INSN_LIST rather than an > EXPR_LIST, so that copying is done right and dumps look > better. */ > Index: gcc/omp-low.c > =================================================================== > --- gcc/omp-low.c (.../trunk) (revision 180744) > +++ gcc/omp-low.c (.../branches/transactional-memory) (revision > 180773) > @@ -139,6 +139,7 @@ static tree scan_omp_1_op (tree *, int * > case GIMPLE_TRY: \ > case GIMPLE_CATCH: \ > case GIMPLE_EH_FILTER: \ > + case GIMPLE_TRANSACTION: \ > /* The sub-statements for these should be walked. */ \ > *handled_ops_p = false; \ > break; > Index: gcc/toplev.c > =================================================================== > --- gcc/toplev.c (.../trunk) (revision 180744) > +++ gcc/toplev.c (.../branches/transactional-memory) (revision > 180773) > @@ -599,6 +599,7 @@ compile_file (void) > > output_shared_constant_pool (); > output_object_blocks (); > + finish_tm_clone_pairs (); > /* Write out any pending weak symbol declarations. */ > weak_finish (); > Index: gcc/cgraphunit.c > =================================================================== > --- gcc/cgraphunit.c (.../trunk) (revision 180744) > +++ gcc/cgraphunit.c (.../branches/transactional-memory) (revision > 180773) > @@ -2272,7 +2272,7 @@ update_call_expr (struct cgraph_node *ne > was copied to prevent duplications of calls that are dead > in the clone. */ > > -static struct cgraph_node * > +struct cgraph_node * > cgraph_copy_node_for_versioning (struct cgraph_node *old_version, > tree new_decl, > VEC(cgraph_edge_p,heap) *redirect_callers, > @@ -2286,7 +2286,7 @@ cgraph_copy_node_for_versioning (struct > > new_version = cgraph_create_node (new_decl); > > - new_version->analyzed = true; > + new_version->analyzed = old_version->analyzed;
Hm? analyzed means "with body", sure you have a body if you clone. > new_version->local = old_version->local; > new_version->local.externally_visible = false; > new_version->local.local = true; > @@ -2294,6 +2294,7 @@ cgraph_copy_node_for_versioning (struct > new_version->rtl = old_version->rtl; > new_version->reachable = true; > new_version->count = old_version->count; > + new_version->lowered = true; OTOH this isn't necessary true. cgraph exists before lowering. > for (e = old_version->callees; e; e=e->next_callee) > if (!bbs_to_copy > @@ -2389,7 +2390,6 @@ cgraph_function_versioning (struct cgrap > DECL_VIRTUAL_P (new_version_node->decl) = 0; > new_version_node->local.externally_visible = 0; > new_version_node->local.local = 1; > - new_version_node->lowered = true; > > /* Update the call_expr on the edges to call the new version node. */ > update_call_expr (new_version_node); > Index: gcc/tree-ssa-alias.c > =================================================================== > --- gcc/tree-ssa-alias.c (.../trunk) (revision 180744) > +++ gcc/tree-ssa-alias.c (.../branches/transactional-memory) > (revision 180773) > @@ -1182,6 +1182,8 @@ ref_maybe_used_by_call_p_1 (gimple call, > case BUILT_IN_MEMPCPY: > case BUILT_IN_STPCPY: > case BUILT_IN_STPNCPY: > + case BUILT_IN_TM_MEMCPY: > + case BUILT_IN_TM_MEMMOVE: > { > ao_ref dref; > tree size = NULL_TREE; > @@ -1228,6 +1230,32 @@ ref_maybe_used_by_call_p_1 (gimple call, > size); > return refs_may_alias_p_1 (&dref, ref, false); > } > + > + /* The following functions read memory pointed to by their > + first argument. */ > + CASE_BUILT_IN_TM_LOAD (1): > + CASE_BUILT_IN_TM_LOAD (2): > + CASE_BUILT_IN_TM_LOAD (4): > + CASE_BUILT_IN_TM_LOAD (8): > + CASE_BUILT_IN_TM_LOAD (FLOAT): > + CASE_BUILT_IN_TM_LOAD (DOUBLE): > + CASE_BUILT_IN_TM_LOAD (LDOUBLE): > + CASE_BUILT_IN_TM_LOAD (M64): > + CASE_BUILT_IN_TM_LOAD (M128): > + CASE_BUILT_IN_TM_LOAD (M256): > + case BUILT_IN_TM_LOG: > + case BUILT_IN_TM_LOG_1: > + case BUILT_IN_TM_LOG_2: > + case BUILT_IN_TM_LOG_4: > + case BUILT_IN_TM_LOG_8: > + case BUILT_IN_TM_LOG_FLOAT: > + case BUILT_IN_TM_LOG_DOUBLE: > + case BUILT_IN_TM_LOG_LDOUBLE: > + case BUILT_IN_TM_LOG_M64: > + case BUILT_IN_TM_LOG_M128: > + case BUILT_IN_TM_LOG_M256: > + return ptr_deref_may_alias_ref_p_1 (gimple_call_arg (call, 0), > ref); > + > /* These read memory pointed to by the first argument. */ > case BUILT_IN_STRDUP: > case BUILT_IN_STRNDUP: > @@ -1250,6 +1278,7 @@ ref_maybe_used_by_call_p_1 (gimple call, > case BUILT_IN_STACK_SAVE: > case BUILT_IN_STACK_RESTORE: > case BUILT_IN_MEMSET: > + case BUILT_IN_TM_MEMSET: > case BUILT_IN_MEMSET_CHK: > case BUILT_IN_FREXP: > case BUILT_IN_FREXPF: > @@ -1480,6 +1509,19 @@ call_may_clobber_ref_p_1 (gimple call, a > case BUILT_IN_STRCAT: > case BUILT_IN_STRNCAT: > case BUILT_IN_MEMSET: > + case BUILT_IN_TM_MEMSET: > + CASE_BUILT_IN_TM_STORE (1): > + CASE_BUILT_IN_TM_STORE (2): > + CASE_BUILT_IN_TM_STORE (4): > + CASE_BUILT_IN_TM_STORE (8): > + CASE_BUILT_IN_TM_STORE (FLOAT): > + CASE_BUILT_IN_TM_STORE (DOUBLE): > + CASE_BUILT_IN_TM_STORE (LDOUBLE): > + CASE_BUILT_IN_TM_STORE (M64): > + CASE_BUILT_IN_TM_STORE (M128): > + CASE_BUILT_IN_TM_STORE (M256): > + case BUILT_IN_TM_MEMCPY: > + case BUILT_IN_TM_MEMMOVE: > { > ao_ref dref; > tree size = NULL_TREE; > Index: gcc/ipa-inline.c > =================================================================== > --- gcc/ipa-inline.c (.../trunk) (revision 180744) > +++ gcc/ipa-inline.c (.../branches/transactional-memory) (revision > 180773) > @@ -284,6 +284,15 @@ can_inline_edge_p (struct cgraph_edge *e > e->inline_failed = CIF_EH_PERSONALITY; > inlinable = false; > } > + /* TM pure functions should not get inlined if the outer function is > + a TM safe function. */ > + else if (flag_tm Please move flag checks into the respective prediates. Any reason why the is_tm_pure () predicate wouldn't already do the correct thing with !flag_tm? > + && is_tm_pure (callee->decl) > + && is_tm_safe (e->caller->decl)) > + { > + e->inline_failed = CIF_UNSPECIFIED; > + inlinable = false; > + } > /* Don't inline if the callee can throw non-call exceptions but the > caller cannot. > FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is > missing. > Index: gcc/crtstuff.c > =================================================================== > --- gcc/crtstuff.c (.../trunk) (revision 180744) > +++ gcc/crtstuff.c (.../branches/transactional-memory) (revision > 180773) > @@ -162,6 +162,9 @@ extern void __do_global_ctors_1 (void); > /* Likewise for _Jv_RegisterClasses. */ > extern void _Jv_RegisterClasses (void *) TARGET_ATTRIBUTE_WEAK; > > +extern void _ITM_registerTMCloneTable (void *, size_t) > TARGET_ATTRIBUTE_WEAK; > +extern void _ITM_deregisterTMCloneTable (void *) TARGET_ATTRIBUTE_WEAK; > + > #ifdef OBJECT_FORMAT_ELF > > /* Declare a pointer to void function type. */ > @@ -241,6 +244,11 @@ STATIC void *__JCR_LIST__[] > = { }; > #endif /* JCR_SECTION_NAME */ > > +STATIC func_ptr __TMC_LIST__[] > + __attribute__((unused, section(".tm_clone_table"), > aligned(sizeof(void*)))) > + = { }; > +extern func_ptr __TMC_END__[] __attribute__((__visibility__ ("hidden"))); > + > #if defined(INIT_SECTION_ASM_OP) || defined(INIT_ARRAY_SECTION_ASM_OP) > > #ifdef OBJECT_FORMAT_ELF > @@ -330,6 +338,13 @@ __do_global_dtors_aux (void) > } > #endif /* !defined(FINI_ARRAY_SECTION_ASM_OP) */ > > + if (_ITM_deregisterTMCloneTable) > + { > + size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2; > + if (size > 0) > + _ITM_deregisterTMCloneTable (__TMC_LIST__); > + } > + > #ifdef USE_EH_FRAME_REGISTRY > #ifdef CRT_GET_RFIB_DATA > /* If we used the new __register_frame_info_bases interface, > @@ -391,6 +406,12 @@ frame_dummy (void) > register_classes (__JCR_LIST__); > } > #endif /* JCR_SECTION_NAME */ > + if (_ITM_registerTMCloneTable) > + { > + size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2; > + if (size > 0) > + _ITM_registerTMCloneTable (__TMC_LIST__, size); > + } > } > > #ifdef INIT_SECTION_ASM_OP > @@ -457,6 +478,13 @@ __do_global_dtors (void) > for (p = __DTOR_LIST__ + 1; (f = *p); p++) > f (); > > + if (_ITM_deregisterTMCloneTable) > + { > + size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2; > + if (size > 0) > + _ITM_deregisterTMCloneTable (__TMC_LIST__); > + } > + > #ifdef USE_EH_FRAME_REGISTRY > if (__deregister_frame_info) > __deregister_frame_info (__EH_FRAME_BEGIN__); > @@ -570,6 +598,11 @@ STATIC void *__JCR_END__[1] > = { 0 }; > #endif /* JCR_SECTION_NAME */ > > +func_ptr __TMC_END__[] > + __attribute__((unused, section(".tm_clone_table"), aligned(sizeof(void > *)), > + __visibility__ ("hidden"))) > + = { }; > + > #ifdef INIT_ARRAY_SECTION_ASM_OP > > /* If we are using .init_array, there is nothing to do. */ > Index: gcc/cfgbuild.c > =================================================================== > --- gcc/cfgbuild.c (.../trunk) (revision 180744) > +++ gcc/cfgbuild.c (.../branches/transactional-memory) (revision > 180773) > @@ -338,18 +338,30 @@ make_edges (basic_block min, basic_block > /* Add any appropriate EH edges. */ > rtl_make_eh_edge (edge_cache, bb, insn); > > - if (code == CALL_INSN && nonlocal_goto_handler_labels) > + if (code == CALL_INSN) > { > - /* ??? This could be made smarter: in some cases it's possible > - to tell that certain calls will not do a nonlocal goto. > - For example, if the nested functions that do the nonlocal > - gotos do not have their addresses taken, then only calls to > - those functions or to other nested functions that use them > - could possibly do nonlocal gotos. */ > if (can_nonlocal_goto (insn)) > - for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1)) > - make_label_edge (edge_cache, bb, XEXP (x, 0), > - EDGE_ABNORMAL | EDGE_ABNORMAL_CALL); > + { > + /* ??? This could be made smarter: in some cases it's > + possible to tell that certain calls will not do a > + nonlocal goto. For example, if the nested functions > + that do the nonlocal gotos do not have their addresses > + taken, then only calls to those functions or to other > + nested functions that use them could possibly do > + nonlocal gotos. */ > + for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1)) > + make_label_edge (edge_cache, bb, XEXP (x, 0), > + EDGE_ABNORMAL | EDGE_ABNORMAL_CALL); > + } > + > + if (flag_tm) > + { > + rtx note; > + for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) > + if (REG_NOTE_KIND (note) == REG_TM) > + make_label_edge (edge_cache, bb, XEXP (note, 0), > + EDGE_ABNORMAL | EDGE_ABNORMAL_CALL); > + } > } > } > > Index: gcc/timevar.def > =================================================================== > --- gcc/timevar.def (.../trunk) (revision 180744) > +++ gcc/timevar.def (.../branches/transactional-memory) (revision > 180773) > @@ -184,6 +184,7 @@ DEFTIMEVAR (TV_TREE_COPY_RENAME , " > DEFTIMEVAR (TV_TREE_SSA_VERIFY , "tree SSA verifier") > DEFTIMEVAR (TV_TREE_STMT_VERIFY , "tree STMT verifier") > DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch initialization > conversion") > +DEFTIMEVAR (TV_TRANS_MEM , "transactional memory") > DEFTIMEVAR (TV_TREE_STRLEN , "tree strlen optimization") > DEFTIMEVAR (TV_CGRAPH_VERIFY , "callgraph verifier") > DEFTIMEVAR (TV_DOM_FRONTIERS , "dominance frontiers") > Index: gcc/recog.c > =================================================================== > --- gcc/recog.c (.../trunk) (revision 180744) > +++ gcc/recog.c (.../branches/transactional-memory) (revision 180773) > @@ -3287,6 +3287,7 @@ peep2_attempt (basic_block bb, rtx insn, > { > case REG_NORETURN: > case REG_SETJMP: > + case REG_TM: > add_reg_note (new_insn, REG_NOTE_KIND (note), > XEXP (note, 0)); > break; > Index: gcc/function.h > =================================================================== > --- gcc/function.h (.../trunk) (revision 180744) > +++ gcc/function.h (.../branches/transactional-memory) (revision > 180773) > @@ -467,6 +467,14 @@ extern GTY(()) struct rtl_data x_rtl; > want to do differently. */ > #define crtl (&x_rtl) > > +/* This structure is used to map a gimple statement to a label, > + or list of labels to represent transaction restart. */ > + > +struct GTY(()) tm_restart_node { > + gimple stmt; > + tree label_or_list; > +}; > + > struct GTY(()) stack_usage > { > /* # of bytes of static stack space allocated by the function. */ > @@ -518,6 +526,10 @@ struct GTY(()) function { > /* Value histograms attached to particular statements. */ > htab_t GTY((skip)) value_histograms; > > + /* Map gimple stmt to tree label (or list of labels) for transaction > + restart and abort. */ > + htab_t GTY ((param_is (struct tm_restart_node))) tm_restart; > + As this maps 'gimple' to tree shouldn't this go to fn->gimple_df instead? That way you avoid growing generic struct function. Or in to eh_status, if that looks like a better fit. > /* For function.c. */ > > /* Points to the FUNCTION_DECL of this function. */ > Index: gcc/emit-rtl.c > =================================================================== > --- gcc/emit-rtl.c (.../trunk) (revision 180744) > +++ gcc/emit-rtl.c (.../branches/transactional-memory) (revision > 180773) > @@ -3595,6 +3595,7 @@ try_split (rtx pat, rtx trial, int last) > > case REG_NORETURN: > case REG_SETJMP: > + case REG_TM: > for (insn = insn_last; insn != NULL_RTX; insn = PREV_INSN (insn)) > { > if (CALL_P (insn)) > Index: gcc/cfgexpand.c > =================================================================== > --- gcc/cfgexpand.c (.../trunk) (revision 180744) > +++ gcc/cfgexpand.c (.../branches/transactional-memory) (revision > 180773) > @@ -2096,6 +2096,32 @@ expand_gimple_stmt (gimple stmt) > } > } > > + /* Mark all calls that can have a transaction restart. */ Why isn't this done when we expand the call? This walking of the RTL sequence looks like a hack (an easy one, albeit). > + if (cfun->tm_restart && is_gimple_call (stmt)) > + { > + struct tm_restart_node dummy; > + void **slot; > + > + dummy.stmt = stmt; > + slot = htab_find_slot (cfun->tm_restart, &dummy, NO_INSERT); > + if (slot) > + { > + struct tm_restart_node *n = (struct tm_restart_node *) *slot; > + tree list = n->label_or_list; > + rtx insn; > + > + for (insn = next_real_insn (last); !CALL_P (insn); > + insn = next_real_insn (insn)) > + continue; > + > + if (TREE_CODE (list) == LABEL_DECL) > + add_reg_note (insn, REG_TM, label_rtx (list)); > + else > + for (; list ; list = TREE_CHAIN (list)) > + add_reg_note (insn, REG_TM, label_rtx (TREE_VALUE (list))); > + } > + } > + > return last; > } > > @@ -4455,6 +4481,10 @@ gimple_expand_cfg (void) > /* After expanding, the return labels are no longer needed. */ > return_label = NULL; > naked_return_label = NULL; > + > + /* After expanding, the tm_restart map is no longer needed. */ > + cfun->tm_restart = NULL; You should still free it, to not confuse the statistics code I think. > + > /* Tag the blocks with a depth number so that change_scope can find > the common parent easily. */ > set_block_levels (DECL_INITIAL (cfun->decl), 0); > Index: gcc/varasm.c > =================================================================== > --- gcc/varasm.c (.../trunk) (revision 180744) > +++ gcc/varasm.c (.../branches/transactional-memory) (revision > 180773) > @@ -5859,6 +5859,103 @@ assemble_alias (tree decl, tree target) > } > } > > +/* Record and output a table of translations from original function > + to its transaction aware clone. Note that tm_pure functions are > + considered to be their own clone. */ > + > +static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map))) > + htab_t tm_clone_pairs; > + > +void > +record_tm_clone_pair (tree o, tree n) > +{ > + struct tree_map **slot, *h; > + > + if (tm_clone_pairs == NULL) > + tm_clone_pairs = htab_create_ggc (32, tree_map_hash, tree_map_eq, 0); > + > + h = ggc_alloc_tree_map (); > + h->hash = htab_hash_pointer (o); > + h->base.from = o; > + h->to = n; > + > + slot = (struct tree_map **) > + htab_find_slot_with_hash (tm_clone_pairs, h, h->hash, INSERT); > + *slot = h; > +} > + > +tree > +get_tm_clone_pair (tree o) > +{ > + if (tm_clone_pairs) > + { > + struct tree_map *h, in; > + > + in.base.from = o; > + in.hash = htab_hash_pointer (o); > + h = (struct tree_map *) htab_find_with_hash (tm_clone_pairs, > + &in, in.hash); > + if (h) > + return h->to; > + } > + return NULL_TREE; > +} > + > +/* Helper function for finish_tm_clone_pairs. Dump the clone table. */ > + > +int > +finish_tm_clone_pairs_1 (void **slot, void *info ATTRIBUTE_UNUSED) > +{ > + struct tree_map *map = (struct tree_map *) *slot; > + bool *switched = (bool *) info; > + tree src = map->base.from; > + tree dst = map->to; > + struct cgraph_node *src_n = cgraph_get_node (src); > + struct cgraph_node *dst_n = cgraph_get_node (dst); > + > + /* The function ipa_tm_create_version() marks the clone as needed if > + the original function was needed. But we also mark the clone as > + needed if we ever called the clone indirectly through > + TM_GETTMCLONE. If neither of these are true, we didn't generate > + a clone, and we didn't call it indirectly... no sense keeping it > + in the clone table. */ > + if (!dst_n || !dst_n->needed) > + return 1; > + > + /* This covers the case where we have optimized the original > + function away, and only access the transactional clone. */ > + if (!src_n || !src_n->needed) > + return 1; > + > + if (!*switched) > + { > + switch_to_section (get_named_section (NULL, ".tm_clone_table", 3)); > + assemble_align (POINTER_SIZE); > + *switched = true; > + } > + > + assemble_integer (XEXP (DECL_RTL (src), 0), > + POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); > + assemble_integer (XEXP (DECL_RTL (dst), 0), > + POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); > + return 1; > +} > + > +void > +finish_tm_clone_pairs (void) > +{ > + bool switched = false; > + > + if (tm_clone_pairs == NULL) > + return; > + > + htab_traverse_noresize (tm_clone_pairs, finish_tm_clone_pairs_1, > + (void *) &switched); This makes the generated table dependent on memory layout. You need to walk the pairs in some deterministic order. In fact why not walk all cgraph_nodes looking for the pairs - they should be still in the list of clones for a node and you've marked it with DECL_TM_CLONE. You can then sort them by cgraph node uid. Did you check bootstrapping GCC with TM enabled and address-space randomization turned on? > + htab_delete (tm_clone_pairs); > + tm_clone_pairs = NULL; > +} > + > + > /* Emit an assembler directive to set symbol for DECL visibility to > the visibility type VIS, which must not be VISIBILITY_DEFAULT. */ > > Index: gcc/output.h > =================================================================== > --- gcc/output.h (.../trunk) (revision 180744) > +++ gcc/output.h (.../branches/transactional-memory) (revision > 180773) > @@ -606,6 +606,11 @@ extern bool unlikely_text_section_p (sec > extern void switch_to_section (section *); > extern void output_section_asm_op (const void *); > > +extern void record_tm_clone_pair (tree, tree); > +extern void finish_tm_clone_pairs (void); > +extern int finish_tm_clone_pairs_1 (void **, void *); > +extern tree get_tm_clone_pair (tree); > + > extern void default_asm_output_source_filename (FILE *, const char *); > extern void output_file_directive (FILE *, const char *); > > Index: gcc/combine.c > =================================================================== > --- gcc/combine.c (.../trunk) (revision 180744) > +++ gcc/combine.c (.../branches/transactional-memory) (revision > 180773) > @@ -13286,6 +13286,7 @@ distribute_notes (rtx notes, rtx from_in > > case REG_NORETURN: > case REG_SETJMP: > + case REG_TM: > /* These notes must remain with the call. It should not be > possible for both I2 and I3 to be a call. */ > if (CALL_P (i3)) > Index: gcc/tree-flow.h > =================================================================== > --- gcc/tree-flow.h (.../trunk) (revision 180744) > +++ gcc/tree-flow.h (.../branches/transactional-memory) (revision > 180773) > @@ -778,6 +778,9 @@ extern bool maybe_duplicate_eh_stmt (gim > extern bool verify_eh_edges (gimple); > extern bool verify_eh_dispatch_edge (gimple); > > +/* In gtm-low.c */ > +extern bool is_transactional_stmt (const_gimple); > + gimple.h please. looks like a gimple predicate as well, so the implementation should be in gimple.c? > /* In tree-ssa-pre.c */ > struct pre_expr_d; > void add_to_value (unsigned int, struct pre_expr_d *); > Index: gcc/tree-ssa-structalias.c > =================================================================== > --- gcc/tree-ssa-structalias.c (.../trunk) (revision 180744) > +++ gcc/tree-ssa-structalias.c (.../branches/transactional-memory) > (revision 180773) > @@ -4024,6 +4024,8 @@ find_func_aliases_for_builtin_call (gimp > case BUILT_IN_STPCPY_CHK: > case BUILT_IN_STRCAT_CHK: > case BUILT_IN_STRNCAT_CHK: > + case BUILT_IN_TM_MEMCPY: > + case BUILT_IN_TM_MEMMOVE: > { > tree res = gimple_call_lhs (t); > tree dest = gimple_call_arg (t, (DECL_FUNCTION_CODE (fndecl) > @@ -4056,6 +4058,7 @@ find_func_aliases_for_builtin_call (gimp > } > case BUILT_IN_MEMSET: > case BUILT_IN_MEMSET_CHK: > + case BUILT_IN_TM_MEMSET: > { > tree res = gimple_call_lhs (t); > tree dest = gimple_call_arg (t, 0); > @@ -4197,6 +4200,50 @@ find_func_aliases_for_builtin_call (gimp > } > return true; > } > + CASE_BUILT_IN_TM_STORE (1): > + CASE_BUILT_IN_TM_STORE (2): > + CASE_BUILT_IN_TM_STORE (4): > + CASE_BUILT_IN_TM_STORE (8): > + CASE_BUILT_IN_TM_STORE (FLOAT): > + CASE_BUILT_IN_TM_STORE (DOUBLE): > + CASE_BUILT_IN_TM_STORE (LDOUBLE): > + CASE_BUILT_IN_TM_STORE (M64): > + CASE_BUILT_IN_TM_STORE (M128): > + CASE_BUILT_IN_TM_STORE (M256): > + { > + tree addr = gimple_call_arg (t, 0); > + tree src = gimple_call_arg (t, 1); > + > + get_constraint_for (addr, &lhsc); > + do_deref (&lhsc); > + get_constraint_for (src, &rhsc); > + process_all_all_constraints (lhsc, rhsc); > + VEC_free (ce_s, heap, lhsc); > + VEC_free (ce_s, heap, rhsc); > + return true; > + } > + CASE_BUILT_IN_TM_LOAD (1): > + CASE_BUILT_IN_TM_LOAD (2): > + CASE_BUILT_IN_TM_LOAD (4): > + CASE_BUILT_IN_TM_LOAD (8): > + CASE_BUILT_IN_TM_LOAD (FLOAT): > + CASE_BUILT_IN_TM_LOAD (DOUBLE): > + CASE_BUILT_IN_TM_LOAD (LDOUBLE): > + CASE_BUILT_IN_TM_LOAD (M64): > + CASE_BUILT_IN_TM_LOAD (M128): > + CASE_BUILT_IN_TM_LOAD (M256): > + { > + tree dest = gimple_call_lhs (t); > + tree addr = gimple_call_arg (t, 0); > + > + get_constraint_for (dest, &lhsc); > + get_constraint_for (addr, &rhsc); > + do_deref (&rhsc); > + process_all_all_constraints (lhsc, rhsc); > + VEC_free (ce_s, heap, lhsc); > + VEC_free (ce_s, heap, rhsc); > + return true; > + } > /* Variadic argument handling needs to be handled in IPA > mode as well. */ > case BUILT_IN_VA_START: > Index: gcc/tree-cfg.c > =================================================================== > --- gcc/tree-cfg.c (.../trunk) (revision 180744) > +++ gcc/tree-cfg.c (.../branches/transactional-memory) (revision > 180773) > @@ -666,6 +666,15 @@ make_edges (void) > } > break; > > + case GIMPLE_TRANSACTION: > + { > + tree abort_label = gimple_transaction_label (last); > + if (abort_label) > + make_edge (bb, label_to_block (abort_label), 0); > + fallthru = true; > + } > + break; > + > default: > gcc_assert (!stmt_ends_bb_p (last)); > fallthru = true; > @@ -1196,22 +1205,30 @@ cleanup_dead_labels (void) > FOR_EACH_BB (bb) > { > gimple stmt = last_stmt (bb); > + tree label, new_label; > + > if (!stmt) > continue; > > switch (gimple_code (stmt)) > { > case GIMPLE_COND: > - { > - tree true_label = gimple_cond_true_label (stmt); > - tree false_label = gimple_cond_false_label (stmt); > + label = gimple_cond_true_label (stmt); > + if (label) > + { > + new_label = main_block_label (label); > + if (new_label != label) > + gimple_cond_set_true_label (stmt, new_label); > + } > > - if (true_label) > - gimple_cond_set_true_label (stmt, main_block_label > (true_label)); > - if (false_label) > - gimple_cond_set_false_label (stmt, main_block_label > (false_label)); > - break; > - } > + label = gimple_cond_false_label (stmt); > + if (label) > + { > + new_label = main_block_label (label); > + if (new_label != label) > + gimple_cond_set_false_label (stmt, new_label); > + } > + break; > > case GIMPLE_SWITCH: > { > @@ -1221,8 +1238,10 @@ cleanup_dead_labels (void) > for (i = 0; i < n; ++i) > { > tree case_label = gimple_switch_label (stmt, i); > - tree label = main_block_label (CASE_LABEL (case_label)); > - CASE_LABEL (case_label) = label; > + label = CASE_LABEL (case_label); > + new_label = main_block_label (label); > + if (new_label != label) > + CASE_LABEL (case_label) = new_label; > } > break; > } > @@ -1243,13 +1262,27 @@ cleanup_dead_labels (void) > /* We have to handle gotos until they're removed, and we don't > remove them until after we've created the CFG edges. */ > case GIMPLE_GOTO: > - if (!computed_goto_p (stmt)) > + if (!computed_goto_p (stmt)) > { > - tree new_dest = main_block_label (gimple_goto_dest (stmt)); > - gimple_goto_set_dest (stmt, new_dest); > + label = gimple_goto_dest (stmt); > + new_label = main_block_label (label); > + if (new_label != label) > + gimple_goto_set_dest (stmt, new_label); What's the reason for this changes? Optimization? > } > break; > > + case GIMPLE_TRANSACTION: > + { > + tree label = gimple_transaction_label (stmt); > + if (label) > + { > + tree new_label = main_block_label (label); > + if (new_label != label) > + gimple_transaction_set_label (stmt, new_label); > + } > + } > + break; > + > default: > break; > } > @@ -2263,6 +2296,13 @@ is_ctrl_altering_stmt (gimple t) > if (flags & ECF_NORETURN) > return true; > > + /* TM ending statements have backedges out of the transaction. > + Return true so we split the basic block containing > + them. */ > + if ((flags & ECF_TM_OPS) > + && is_tm_ending_fndecl (gimple_call_fndecl (t))) > + return true; > + > /* BUILT_IN_RETURN call is same as return statement. */ > if (gimple_call_builtin_p (t, BUILT_IN_RETURN)) > return true; > @@ -2284,6 +2324,10 @@ is_ctrl_altering_stmt (gimple t) > /* OpenMP directives alter control flow. */ > return true; > > + case GIMPLE_TRANSACTION: > + /* A transaction start alters control flow. */ > + return true; > + > default: > break; > } > @@ -4054,6 +4098,17 @@ verify_gimple_switch (gimple stmt) > return false; > } > > +/* Verify the contents of a GIMPLE_TRANSACTION. Returns true if there > + is a problem, otherwise false. */ > + > +static bool > +verify_gimple_transaction (gimple stmt) > +{ > + tree lab = gimple_transaction_label (stmt); > + if (lab != NULL && TREE_CODE (lab) != LABEL_DECL) > + return true; ISTR this has substatements, so you should handle this in verify_gimple_in_seq_2 and make sure to verify those substatements. > + return false; > +} > > /* Verify a gimple debug statement STMT. > Returns true if anything is wrong. */ > @@ -4155,6 +4210,9 @@ verify_gimple_stmt (gimple stmt) > case GIMPLE_ASM: > return false; > > + case GIMPLE_TRANSACTION: > + return verify_gimple_transaction (stmt); > + Not here. > /* Tuples that do not have tree operands. */ > case GIMPLE_NOP: > case GIMPLE_PREDICT: > @@ -4271,10 +4329,19 @@ verify_gimple_in_seq_2 (gimple_seq stmts > err |= verify_gimple_in_seq_2 (gimple_eh_filter_failure (stmt)); > break; > > + case GIMPLE_EH_ELSE: > + err |= verify_gimple_in_seq_2 (gimple_eh_else_n_body (stmt)); > + err |= verify_gimple_in_seq_2 (gimple_eh_else_e_body (stmt)); > + break; > + > case GIMPLE_CATCH: > err |= verify_gimple_in_seq_2 (gimple_catch_handler (stmt)); > break; > > + case GIMPLE_TRANSACTION: > + err |= verify_gimple_in_seq_2 (gimple_transaction_body (stmt)); > + break; > + Ah, you do. But you'll never call your label verification code. > default: > { > bool err2 = verify_gimple_stmt (stmt); > @@ -5052,6 +5119,14 @@ gimple_redirect_edge_and_branch (edge e, > redirect_eh_dispatch_edge (stmt, e, dest); > break; > > + case GIMPLE_TRANSACTION: > + /* The ABORT edge has a stored label associated with it, otherwise > + the edges are simply redirectable. */ > + /* ??? We don't really need this label after the cfg is created. */ > + if (e->flags == 0) > + gimple_transaction_set_label (stmt, gimple_block_label (dest)); So why set it (and thus keep it live)? > + break; > + > default: > /* Otherwise it must be a fallthru edge, and we don't need to > do anything besides redirecting it. */ > @@ -6428,8 +6503,10 @@ dump_function_to_file (tree fn, FILE *fi > bool ignore_topmost_bind = false, any_var = false; > basic_block bb; > tree chain; > + bool tmclone = TREE_CODE (fn) == FUNCTION_DECL && DECL_IS_TM_CLONE (fn); > > - fprintf (file, "%s (", lang_hooks.decl_printable_name (fn, 2)); > + fprintf (file, "%s %s(", lang_hooks.decl_printable_name (fn, 2), > + tmclone ? "[tm-clone] " : ""); > > arg = DECL_ARGUMENTS (fn); > while (arg) > Index: gcc/passes.c > =================================================================== > --- gcc/passes.c (.../trunk) (revision 180744) > +++ gcc/passes.c (.../branches/transactional-memory) (revision > 180773) > @@ -1174,9 +1174,11 @@ init_optimization_passes (void) > p = &all_lowering_passes; > NEXT_PASS (pass_warn_unused_result); > NEXT_PASS (pass_diagnose_omp_blocks); > + NEXT_PASS (pass_diagnose_tm_blocks); > NEXT_PASS (pass_mudflap_1); > NEXT_PASS (pass_lower_omp); > NEXT_PASS (pass_lower_cf); > + NEXT_PASS (pass_lower_tm); > NEXT_PASS (pass_refactor_eh); > NEXT_PASS (pass_lower_eh); > NEXT_PASS (pass_build_cfg); > @@ -1241,6 +1243,7 @@ init_optimization_passes (void) > } > NEXT_PASS (pass_ipa_increase_alignment); > NEXT_PASS (pass_ipa_matrix_reorg); > + NEXT_PASS (pass_ipa_tm); > NEXT_PASS (pass_ipa_lower_emutls); > *p = NULL; > > @@ -1400,6 +1403,13 @@ init_optimization_passes (void) > NEXT_PASS (pass_uncprop); > NEXT_PASS (pass_local_pure_const); > } > + NEXT_PASS (pass_tm_init); > + { > + struct opt_pass **p = &pass_tm_init.pass.sub; > + NEXT_PASS (pass_tm_mark); > + NEXT_PASS (pass_tm_memopt); > + NEXT_PASS (pass_tm_edges); > + } > NEXT_PASS (pass_lower_complex_O0); > NEXT_PASS (pass_cleanup_eh); > NEXT_PASS (pass_lower_resx); > Index: gcc/reg-notes.def > =================================================================== > --- gcc/reg-notes.def (.../trunk) (revision 180744) > +++ gcc/reg-notes.def (.../branches/transactional-memory) (revision > 180773) > @@ -203,6 +203,11 @@ REG_NOTE (CROSSING_JUMP) > functions that can return twice. */ > REG_NOTE (SETJMP) > > +/* This kind of note is generated at each transactional memory > + builtin, to indicate we need to generate transaction restart > + edges for this insn. */ > +REG_NOTE (TM) > + > /* Indicates the cumulative offset of the stack pointer accounting > for pushed arguments. This will only be generated when > ACCUMULATE_OUTGOING_ARGS is false. */ > Index: gcc/cfgrtl.c > =================================================================== > --- gcc/cfgrtl.c (.../trunk) (revision 180744) > +++ gcc/cfgrtl.c (.../branches/transactional-memory) (revision > 180773) > @@ -2246,6 +2246,8 @@ purge_dead_edges (basic_block bb) > ; > else if ((e->flags & EDGE_EH) && can_throw_internal (insn)) > ; > + else if (flag_tm && find_reg_note (insn, REG_TM, NULL)) > + ; > else > remove = true; > } > Index: gcc/params.def > =================================================================== > --- gcc/params.def (.../trunk) (revision 180744) > +++ gcc/params.def (.../branches/transactional-memory) (revision > 180773) > @@ -872,6 +872,13 @@ DEFPARAM (PARAM_IPA_SRA_PTR_GROWTH_FACTO > "a pointer to an aggregate with", > 2, 0, 0) > > +DEFPARAM (PARAM_TM_MAX_AGGREGATE_SIZE, > + "tm-max-aggregate-size", > + "Size in bytes after which thread-local aggregates should be " > + "instrumented with the logging functions instead of save/restore " > + "pairs", > + 9, 0, 0) > + > DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE, > "ipa-cp-value-list-size", > "Maximum size of a list of values associated with each parameter > for " >