Hi, this patch finishes transition of IPA passes to be per-function (and thus behaving sanely at LTO time when different flags are mixed). It also goes through common.opt and fixes attributes of quite few flags:
fauto-inc-dec, fdelete-dead-exceptions, ffunction-cse, fgraphite, fstrict-volatile-bitfields, fira-algorithm, fira-region, fira-share-save-slots, fira-share-spill-slots, fmodulo-sched-allow-regmoves, fpartial-inlining, sched-stalled-insns, fsched-stalled-insns-dep, fstrict-overflow, ftracer, ftree-parallelize-loops, fassociative-math, freciprocal-math, fvect-cost-model, fsimd-cost-model, flag_stack_reuse are all function local properties and thus should be marked as Optimization. while fauto-profile, fcommon, fdata-sections, fipa-icf-variables, ftoplevel-reorder, funit-at-a-time, fwhole-program currentwly won't work in optimization attributes (and won't get properly maintained to LTO). This is becuase either they affect variables that are not annotated or because they are decided globally for whole compilation unit. There are few cases I need to look into still: flag-function-sections - Here we can support optimizaiton attribute but currently don't - basically it needs revisiting the few flag_function_sections uses and making the opt_for_fn. flag_gnu_tm flag_gnu_unique - Those probably can be turned into Optimization but I have no clue. flag_proflie_reorder_functions - can be handled but isn't at the moment. flag_split_stack - this one needs some unit finalization flag_stack_protector - Probably can be supported as Optimization flag_strict_aliasing - This is marked as Optimization, but I blieve it does not work that way because alias classes are assigned to types that gets shared. It would be great to support this since a lot of real code is mixing the settings. flag_toplevel_reorder - This can be supported but isn't Similarly to flag_proflie_reorder_functions needs a bit tweaking of lto and cgraphunit. fp_contract_mode - This probalby should be optimization but doing so makes the awk machinery to fail. Bootstrapped/regtested x86_64-linux, will commit it later today if there are no complains. Honza * ipa-reference.c (set_reference_optimization_summary, ipa_reference_get_not_written_global): Do nothing if ipa-reference is disabled. (propagate_bits): If ipa-reference is disabled, do not look into local properties. (analyze_function): Disable analysis when ipa_reference is disabled. (generate_summary): Do not dump when reference is disabled. (get_read_write_all_from_node): When ipa-reference is disabled, use the node flags. (gate): Enable for LTO. * optc-save-gen.awk: Handle optimize_debug correctly. * opth-gen.awk: Likewise. * common.opt (fauto-inc-dec, fdelete-dead-exceptions, ffunction-cse, fgraphite, fstrict-volatile-bitfields, fira-algorithm, fira-region, fira-share-save-slots, fira-share-spill-slots, fmodulo-sched-allow-regmoves, fpartial-inlining, sched-stalled-insns, fsched-stalled-insns-dep, fstrict-overflow, ftracer, ftree-parallelize-loops, fassociative-math, freciprocal-math, fvect-cost-model, fsimd-cost-model): Mark as Optimization (fauto-profile, fcommon, fdata-sections, fipa-icf-variables, ftoplevel-reorder, funit-at-a-time, fwhole-program): Do not mark as Optimization. * ipa-icf.c (gate, sem_item_optimizer::filter_removed_items): Fix for IPA. Index: ipa-reference.c =================================================================== --- ipa-reference.c (revision 219756) +++ ipa-reference.c (working copy) @@ -198,6 +198,9 @@ set_reference_optimization_summary (stru bitmap ipa_reference_get_not_read_global (struct cgraph_node *fn) { + if (!opt_for_fn (fn->decl, flag_ipa_reference) + || !opt_for_fn (current_function_decl, flag_ipa_reference)) + return NULL; ipa_reference_optimization_summary_t info = get_reference_optimization_summary (fn->function_symbol (NULL)); if (info) @@ -216,6 +219,9 @@ ipa_reference_get_not_read_global (struc bitmap ipa_reference_get_not_written_global (struct cgraph_node *fn) { + if (!opt_for_fn (fn->decl, flag_ipa_reference) + || !opt_for_fn (current_function_decl, flag_ipa_reference)) + return NULL; ipa_reference_optimization_summary_t info = get_reference_optimization_summary (fn); if (info) @@ -381,8 +387,9 @@ propagate_bits (ipa_reference_global_var /* Only look into nodes we can propagate something. */ int flags = flags_from_decl_or_type (y->decl); - if (avail > AVAIL_INTERPOSABLE - || (avail == AVAIL_INTERPOSABLE && (flags & ECF_LEAF))) + if (opt_for_fn (y->decl, flag_ipa_reference) + && (avail > AVAIL_INTERPOSABLE + || (avail == AVAIL_INTERPOSABLE && (flags & ECF_LEAF)))) { if (get_reference_vars_info (y)) { @@ -474,6 +481,8 @@ analyze_function (struct cgraph_node *fn int i; tree var; + if (!opt_for_fn (fn->decl, flag_ipa_reference)) + return; local = init_function_info (fn); for (i = 0; fn->iterate_reference (i, ref); i++) { @@ -570,7 +579,8 @@ generate_summary (void) if (dump_file) FOR_EACH_DEFINED_FUNCTION (node) - if (node->get_availability () >= AVAIL_INTERPOSABLE) + if (node->get_availability () >= AVAIL_INTERPOSABLE + && opt_for_fn (node->decl, flag_ipa_reference)) { ipa_reference_local_vars_info_t l; unsigned int index; @@ -607,7 +617,7 @@ read_write_all_from_decl (struct cgraph_ tree decl = node->decl; int flags = flags_from_decl_or_type (decl); if ((flags & ECF_LEAF) - && node->get_availability () <= AVAIL_INTERPOSABLE) + && node->get_availability () < AVAIL_INTERPOSABLE) ; else if (flags & ECF_CONST) ; @@ -640,7 +650,8 @@ get_read_write_all_from_node (struct cgr struct cgraph_edge *e, *ie; /* When function is overwritable, we can not assume anything. */ - if (node->get_availability () <= AVAIL_INTERPOSABLE) + if (node->get_availability () <= AVAIL_INTERPOSABLE + || (node->analyzed && !opt_for_fn (node->decl, flag_ipa_reference))) read_write_all_from_decl (node, read_all, write_all); for (e = node->callees; @@ -650,7 +661,8 @@ get_read_write_all_from_node (struct cgr enum availability avail; struct cgraph_node *callee = e->callee->function_symbol (&avail); gcc_checking_assert (callee); - if (avail <= AVAIL_INTERPOSABLE) + if (avail <= AVAIL_INTERPOSABLE + || (callee->analyzed && !opt_for_fn (callee->decl, flag_ipa_reference))) read_write_all_from_decl (callee, read_all, write_all); } @@ -1178,7 +1190,7 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return (flag_ipa_reference + return ((in_lto_p || flag_ipa_reference) /* Don't bother doing anything if the program has errors. */ && !seen_error ()); } Index: optc-save-gen.awk =================================================================== --- optc-save-gen.awk (revision 219756) +++ optc-save-gen.awk (working copy) @@ -84,15 +84,17 @@ print "void"; print "cl_optimization_save (struct cl_optimization *ptr, struct gcc_options *opts)"; print "{"; -n_opt_char = 2; +n_opt_char = 3; n_opt_short = 0; n_opt_int = 0; n_opt_enum = 1; n_opt_other = 0; var_opt_char[0] = "optimize"; var_opt_char[1] = "optimize_size"; +var_opt_char[2] = "optimize_debug"; var_opt_range["optimize"] = "0, 255"; -var_opt_range["optimize_size"] = "0, 255"; +var_opt_range["optimize_size"] = "0, 1"; +var_opt_range["optimize_debug"] = "0, 1"; var_opt_enum[0] = "flag_fp_contract_mode"; # Sort by size to mimic how the structure is laid out to be friendlier to the @@ -734,11 +736,13 @@ for (i = 0; i < n_target_val; i++) { print "}"; -n_opt_val = 2; +n_opt_val = 3; var_opt_val[0] = "x_optimize" var_opt_val_type[0] = "char " var_opt_val[1] = "x_optimize_size" +var_opt_val[2] = "x_optimize_debug" var_opt_val_type[1] = "char " +var_opt_val_type[2] = "char " for (i = 0; i < n_opts; i++) { if (flag_set_p("Optimization", flags[i])) { name = var_name(flags[i]) Index: opth-gen.awk =================================================================== --- opth-gen.awk (revision 219756) +++ opth-gen.awk (working copy) @@ -132,13 +132,14 @@ print "/* Structure to save/restore opti print "struct GTY(()) cl_optimization"; print "{"; -n_opt_char = 2; +n_opt_char = 3; n_opt_short = 0; n_opt_int = 0; n_opt_enum = 1; n_opt_other = 0; var_opt_char[0] = "unsigned char x_optimize"; var_opt_char[1] = "unsigned char x_optimize_size"; +var_opt_char[2] = "unsigned char x_optimize_debug"; var_opt_enum[0] = "enum fp_contract_mode x_flag_fp_contract_mode"; for (i = 0; i < n_opts; i++) { Index: common.opt =================================================================== --- common.opt (revision 219756) +++ common.opt (working copy) @@ -912,11 +912,11 @@ Common Report Var(flag_asynchronous_unwi Generate unwind tables that are exact at each instruction boundary fauto-inc-dec -Common Report Var(flag_auto_inc_dec) Init(1) +Common Report Var(flag_auto_inc_dec) Init(1) Optimization Generate auto-inc/dec instructions fauto-profile -Common Report Var(flag_auto_profile) Optimization +Common Report Var(flag_auto_profile) Use sample profile information for call graph node weights. The default profile file is fbdata.afdo in 'pwd'. @@ -981,7 +981,7 @@ Common Report Var(flag_combine_stack_adj Looks for opportunities to reduce stack adjustments and stack references. fcommon -Common Report Var(flag_no_common,0) Optimization +Common Report Var(flag_no_common,0) Do not put uninitialized globals in the common section fcompare-debug @@ -1029,7 +1029,7 @@ Common Report Var(flag_cx_fortran_rules) Complex multiplication and division follow Fortran rules fdata-sections -Common Report Var(flag_data_sections) Optimization +Common Report Var(flag_data_sections) Place data items into their own section fdbg-cnt-list @@ -1059,7 +1059,7 @@ Common Report Var(flag_delayed_branch) O Attempt to fill delay slots of branch instructions fdelete-dead-exceptions -Common Report Var(flag_delete_dead_exceptions) Init(0) +Common Report Var(flag_delete_dead_exceptions) Init(0) Optimization Delete dead instructions that may throw exceptions fdelete-null-pointer-checks @@ -1268,7 +1268,7 @@ Enum(fp_contract_mode) String(fast) Valu ; Used for compiling the Unix kernel, where strange substitutions are ; done on the assembly output. ffunction-cse -Common Report Var(flag_no_function_cse,0) +Common Report Var(flag_no_function_cse,0) Optimization Allow function addresses to be held in registers ffunction-sections @@ -1299,7 +1299,7 @@ has finished ; This option is not documented yet as its semantics will change. fgraphite -Common Report Var(flag_graphite) +Common Report Var(flag_graphite) Opitmization Enable in and out of Graphite representation fgraphite-identity @@ -1348,7 +1348,7 @@ Common Report Var(flag_loop_optimize_isl Enable the ISL based loop nest optimizer fstrict-volatile-bitfields -Common Report Var(flag_strict_volatile_bitfields) Init(-1) +Common Report Var(flag_strict_volatile_bitfields) Init(-1) Optimization Force bitfield accesses to match their type width fguess-branch-probability @@ -1477,7 +1477,7 @@ Common Report Var(flag_ipa_icf_functions Perform Identical Code Folding for functions fipa-icf-variables -Common Report Var(flag_ipa_icf_variables) Optimization +Common Report Var(flag_ipa_icf_variables) Perform Identical Code Folding for variables fipa-reference @@ -1493,7 +1493,7 @@ Common Ignore Does nothing. Preserved for backward compatibility. fira-algorithm= -Common Joined RejectNegative Enum(ira_algorithm) Var(flag_ira_algorithm) Init(IRA_ALGORITHM_CB) +Common Joined RejectNegative Enum(ira_algorithm) Var(flag_ira_algorithm) Init(IRA_ALGORITHM_CB) Optimization -fira-algorithm=[CB|priority] Set the used IRA algorithm Enum @@ -1506,7 +1506,7 @@ EnumValue Enum(ira_algorithm) String(priority) Value(IRA_ALGORITHM_PRIORITY) fira-region= -Common Joined RejectNegative Enum(ira_region) Var(flag_ira_region) Init(IRA_REGION_AUTODETECT) +Common Joined RejectNegative Enum(ira_region) Var(flag_ira_region) Init(IRA_REGION_AUTODETECT) Optimization -fira-region=[one|all|mixed] Set regions for IRA Enum @@ -1532,11 +1532,11 @@ Use IRA based register pressure calculat in RTL loop optimizations. fira-share-save-slots -Common Report Var(flag_ira_share_save_slots) Init(1) +Common Report Var(flag_ira_share_save_slots) Init(1) Optimization Share slots for saving different hard registers. fira-share-spill-slots -Common Report Var(flag_ira_share_spill_slots) Init(1) +Common Report Var(flag_ira_share_spill_slots) Init(1) Optimization Share stack slots for spilled pseudo-registers. fira-verbose= @@ -1658,7 +1658,7 @@ Common Report Var(flag_modulo_sched) Opt Perform SMS based modulo scheduling before the first scheduling pass fmodulo-sched-allow-regmoves -Common Report Var(flag_modulo_sched_allow_regmoves) +Common Report Var(flag_modulo_sched_allow_regmoves) Optimization Perform SMS based modulo scheduling with register moves allowed fmove-loop-invariants @@ -1719,7 +1719,7 @@ Common Report Var(flag_optimize_sibling_ Optimize sibling and tail recursive calls fpartial-inlining -Common Report Var(flag_partial_inlining) +Common Report Var(flag_partial_inlining) Optimization Perform partial inlining fpre-ipa-mem-report @@ -1966,7 +1966,7 @@ Common Report Var(flag_sched_stalled_ins Allow premature scheduling of queued insns fsched-stalled-insns= -Common RejectNegative Joined UInteger +Common RejectNegative Joined UInteger Optimization -fsched-stalled-insns=<number> Set number of queued insns that can be prematurely scheduled ; sched_stalled_insns_dep controls how many recently scheduled cycles will @@ -1978,7 +1978,7 @@ Common Report Var(flag_sched_stalled_ins Set dependence distance checking in premature scheduling of queued insns fsched-stalled-insns-dep= -Common RejectNegative Joined UInteger +Common RejectNegative Joined UInteger Optimization -fsched-stalled-insns-dep=<number> Set dependence distance checking in premature scheduling of queued insns fsched-group-heuristic @@ -2114,7 +2114,7 @@ Common Report Var(flag_strict_aliasing) Assume strict aliasing rules apply fstrict-overflow -Common Report Var(flag_strict_overflow) +Common Report Var(flag_strict_overflow) Optimization Treat signed overflow as undefined fsync-libcalls @@ -2157,11 +2157,11 @@ EnumValue Enum(tls_model) String(local-exec) Value(TLS_MODEL_LOCAL_EXEC) ftoplevel-reorder -Common Report Var(flag_toplevel_reorder) Init(2) Optimization +Common Report Var(flag_toplevel_reorder) Init(2) Reorder top level functions, variables, and asms ftracer -Common Report Var(flag_tracer) +Common Report Var(flag_tracer) Optimization Perform superblock formation via tail duplication ; Zero means that floating-point math operations cannot generate a @@ -2285,7 +2285,7 @@ Common Report Var(flag_tree_loop_optimiz Enable loop optimizations on tree level ftree-parallelize-loops= -Common Report Joined RejectNegative UInteger Var(flag_tree_parallelize_loops) Init(1) +Common Report Joined RejectNegative UInteger Var(flag_tree_parallelize_loops) Init(1) Optimization Enable automatic parallelization of loops ftree-phiprop @@ -2337,7 +2337,7 @@ Common Report Var(flag_tree_vrp) Init(0) Perform Value Range Propagation on trees funit-at-a-time -Common Report Var(flag_unit_at_a_time) Init(1) Optimization +Common Report Var(flag_unit_at_a_time) Init(1) Compile whole compilation unit at a time funroll-loops @@ -2356,12 +2356,12 @@ Common Report Var(flag_unsafe_loop_optim Allow loop optimizations to assume that the loops behave in normal way fassociative-math -Common Report Var(flag_associative_math) SetByCombined +Common Report Var(flag_associative_math) SetByCombined Optimization Allow optimization for floating-point arithmetic which may change the result of the operation due to rounding. freciprocal-math -Common Report Var(flag_reciprocal_math) SetByCombined +Common Report Var(flag_reciprocal_math) SetByCombined Optimization Same as -fassociative-math for expressions which include division. ; Nonzero means that unsafe floating-point math optimizations are allowed @@ -2439,11 +2439,11 @@ Common Report Var(flag_tree_slp_vectoriz Enable basic block vectorization (SLP) on trees fvect-cost-model= -Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) +Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization Specifies the cost model for vectorization fsimd-cost-model= -Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED) +Common Joined RejectNegative Enum(vect_cost_model) Var(flag_simd_cost_model) Init(VECT_COST_MODEL_UNLIMITED) Optimization Specifies the vectorization cost model for code marked with a simd directive Enum @@ -2540,7 +2540,7 @@ Common Report Var(flag_tree_builtin_call Enable conditional dead code elimination for builtin calls fwhole-program -Common Report Var(flag_whole_program) Init(0) Optimization +Common Report Var(flag_whole_program) Init(0) Perform whole program optimizations fwrapv Index: ipa-icf.c =================================================================== --- ipa-icf.c (revision 219756) +++ ipa-icf.c (working copy) @@ -1652,7 +1652,8 @@ sem_item_optimizer::filter_removed_items { sem_item *item = m_items[i]; - if (!flag_ipa_icf_functions && item->type == FUNC) + if (item->type == FUNC + && !opt_for_fn (item->node->decl, flag_ipa_icf_functions)) { remove_item (item); continue; @@ -2499,7 +2500,7 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return flag_ipa_icf_variables || flag_ipa_icf_functions; + return in_lto_p || flag_ipa_icf_variables || flag_ipa_icf_functions; } virtual unsigned int execute (function *)