Re: [PATCH, rtl-optimization]: Fix PR60851, ICE: in extract_constrain_insn_cached, at recog.c
On Wed, Mar 18, 2015 at 08:00:33PM +0100, Uros Bizjak wrote: > 2015-03-18 Uros Bizjak > > PR rtl-optimization/60851 > * recog.c (constrain_operands): Accept a pseudo register before reload > for LRA enabled targets. > > --- recog.c (revision 221493) > +++ recog.c (working copy) > @@ -2775,6 +2775,10 @@ constrain_operands (int strict, alternative_mask a > /* Before reload, accept what reload can turn > into mem. */ > || (strict < 0 && CONSTANT_P (op)) > +/* Before reload, accept a pseudo, > + since LRA can turn it into mem. */ > +|| (targetm.lra_p () && strict < 0 && REG_P (op) > +&& REGNO (op) >= FIRST_PSEUDO_REGISTER) > /* During reload, accept a pseudo */ > || (reload_in_progress && REG_P (op) > && REGNO (op) >= FIRST_PSEUDO_REGISTER))) This looks reasonable to me, but please give Vlad an extra day to comment on it. As the two adjacent conditions are mostly the same, perhaps it might be better written as || ((/* Before reload, accept a pseudo, since LRA can turn it into a mem. (targetm.lra_p () && strict < 0) /* During reload, accept a pseudo. */ || reload_in_progress) && REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER))) or put REG_P && REGNO checks first and only then test when. For 4.9 backport, please wait a few days after it goes into the trunk. Jakub
Re: [PATCH] Teach ipa-split about TSAN_FUNC_EXIT (PR sanitizer/65400)
On March 18, 2015 11:36:09 PM GMT+01:00, Jakub Jelinek wrote: >Hi! > >TSAN_FUNC_EXIT internal function is special, because we drop it during >inlining, so for fnsplit we need to be careful about it, otherwise we >can >end up with unbalanced pairs of tsan entry/exit marker functions. > >This patch gives up unless it is one of the two most common cases with >-fsanitize=thread - either TSAN_FUNC_EXIT in the return bb, which is >handled >as accepting it as return bb despite the TSAN_FUNC_EXIT call - >duplicating >the TSAN_FUNC_EXIT is exactly the right thing to do in that case, we >want it >in the *.part.* function and also in the caller, if the former is later >inlined into the latter again, the TSAN_FUNC_EXIT of the former is >dropped. >And another case that we handle is if there is TSAN_FUNC_EXIT on the >predecessors of return_bb - we can handle that case easily too by >duplicating TSAN_FUNC_EXIT after the *.part.* call. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? OK. Thanks, Richard. >2015-03-18 Jakub Jelinek > > PR sanitizer/65400 > * ipa-split.c (find_return_bb): Allow TSAN_FUNC_EXIT internal > call in the return bb. > (find_split_points): Add RETURN_BB argument, don't call > find_return_bb. > (split_function): Likewise. Add ADD_TSAN_FUNC_EXIT argument, > if true append TSAN_FUNC_EXIT internal call after the call to > the split off function. > (execute_split_functions): Call find_return_bb here. > Don't optimize if TSAN_FUNC_EXIT is found in unexpected places. > Adjust find_split_points and split_function calls. > > * c-c++-common/tsan/pr65400-1.c: New test. > * c-c++-common/tsan/pr65400-2.c: New test. > >--- gcc/ipa-split.c.jj 2015-03-17 18:10:11.0 +0100 >+++ gcc/ipa-split.c2015-03-18 17:12:45.017773858 +0100 >@@ -769,7 +769,8 @@ consider_split (struct split_point *curr >of the form: > = tmp_var; >return >- but return_bb can not be more complex than this. >+ but return_bb can not be more complex than this (except for >+ -fsanitize=thread we allow TSAN_FUNC_EXIT () internal call in >there). >If nothing is found, return the exit block. > > When there are multiple RETURN statement, chose one with return value, >@@ -814,6 +815,13 @@ find_return_bb (void) > found_return = true; > retval = gimple_return_retval (return_stmt); > } >+ /* For -fsanitize=thread, allow also TSAN_FUNC_EXIT () in the >return >+ bb. */ >+ else if ((flag_sanitize & SANITIZE_THREAD) >+ && is_gimple_call (stmt) >+ && gimple_call_internal_p (stmt) >+ && gimple_call_internal_fn (stmt) == IFN_TSAN_FUNC_EXIT) >+ ; > else > break; > } >@@ -1074,12 +1082,11 @@ typedef struct >the component used by consider_split. */ > > static void >-find_split_points (int overall_time, int overall_size) >+find_split_points (basic_block return_bb, int overall_time, int >overall_size) > { > stack_entry first; > vec stack = vNULL; > basic_block bb; >- basic_block return_bb = find_return_bb (); > struct split_point current; > > current.header_time = overall_time; >@@ -1236,19 +1243,20 @@ insert_bndret_call_after (tree retbnd, t > gimple_call_set_lhs (bndret, retbnd); > gsi_insert_after (gsi, bndret, GSI_CONTINUE_LINKING); > } >+ > /* Split function at SPLIT_POINT. */ > > static void >-split_function (struct split_point *split_point) >+split_function (basic_block return_bb, struct split_point >*split_point, >+ bool add_tsan_func_exit) > { > vec args_to_pass = vNULL; > bitmap args_to_skip; > tree parm; > int num = 0; >cgraph_node *node, *cur_node = cgraph_node::get >(current_function_decl); >- basic_block return_bb = find_return_bb (); > basic_block call_bb; >- gcall *call; >+ gcall *call, *tsan_func_exit_call = NULL; > edge e; > edge_iterator ei; > tree retval = NULL, real_retval = NULL, retbnd = NULL; >@@ -1534,11 +1542,18 @@ split_function (struct split_point *spli > || DECL_BY_REFERENCE (DECL_RESULT (current_function_decl > gimple_call_set_return_slot_opt (call, true); > >+ if (add_tsan_func_exit) >+tsan_func_exit_call = gimple_build_call_internal >(IFN_TSAN_FUNC_EXIT, 0); >+ > /* Update return value. This is bit tricky. When we do not return, > do nothing. When we return we might need to update return_bb > or produce a new return statement. */ > if (!split_part_return_p) >-gsi_insert_after (&gsi, call, GSI_NEW_STMT); >+{ >+ gsi_insert_after (&gsi, call, GSI_NEW_STMT); >+ if (tsan_func_exit_call) >+ gsi_insert_after (&gsi, tsan_func_exit_call, GSI_NEW_STMT); >+} > else > { > e = make_edge (call_bb, return_bb, >@@ -1642,6 +1657,8 @@ split_function (struct split_point *spli > } > else > gsi_insert_after (&gsi, call, GSI_NEW_STMT); >+if (tsan_func_exit_ca
Re: [CHKP, PATCH] Fix instrumented indirect calls with propagated pointers
On 12 Mar 13:09, Ilya Enkovich wrote: > Hi, > > Instrumented function pointer may be propagated into not instrumented > indirect call and vice versa. It requires additional call modifications > (either remove bounds or change callee). Bootstrapped and tested on > x86_64-unknown-linux-gnu. OK for trunk? > > Here is an updated version where we don't remove bounds args in case all args are to be removed anyway. Otherwise new statement is created and EH is not cleaned for it in redirect_all_calls. New test is added. Bootstrapped and tested on x86_64-unknown-linux-gnu. OK for trunk? Thanks, Ilya -- gcc/ 2015-03-19 Ilya Enkovich * cgraph.c (cgraph_edge::redirect_call_stmt_to_callee): Add redirection for instrumented calls. * tree-chkp.h (chkp_copy_call_skip_bounds): New. (chkp_redirect_edge): New. * tree-chkp.c (chkp_copy_call_skip_bounds): New. (chkp_redirect_edge): New. gcc/testsuite/ 2015-03-19 Ilya Enkovich * gcc.target/i386/mpx/chkp-fix-calls-1.c: New. * gcc.target/i386/mpx/chkp-fix-calls-2.c: New. * gcc.target/i386/mpx/chkp-fix-calls-3.c: New. diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 5ca1901..a0b0465 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -1278,14 +1278,25 @@ cgraph_edge::redirect_call_stmt_to_callee (void) { cgraph_edge *e = this; - tree decl = gimple_call_fndecl (e->call_stmt); - tree lhs = gimple_call_lhs (e->call_stmt); + tree decl; + tree lhs; gcall *new_stmt; gimple_stmt_iterator gsi; + bool skip_bounds = false; #ifdef ENABLE_CHECKING cgraph_node *node; #endif + /* We might propagate instrumented function pointer into + not instrumented function and vice versa. In such a + case we need to either fix function declaration or + remove bounds from call statement. */ + if (callee) +skip_bounds = chkp_redirect_edge (e); + + decl = gimple_call_fndecl (e->call_stmt); + lhs = gimple_call_lhs (e->call_stmt); + if (e->speculative) { cgraph_edge *e2; @@ -1391,7 +1402,8 @@ cgraph_edge::redirect_call_stmt_to_callee (void) } if (e->indirect_unknown_callee - || decl == e->callee->decl) + || (decl == e->callee->decl + && !skip_bounds)) return e->call_stmt; #ifdef ENABLE_CHECKING @@ -1416,13 +1428,19 @@ cgraph_edge::redirect_call_stmt_to_callee (void) } } - if (e->callee->clone.combined_args_to_skip) + if (e->callee->clone.combined_args_to_skip + || skip_bounds) { int lp_nr; - new_stmt - = gimple_call_copy_skip_args (e->call_stmt, - e->callee->clone.combined_args_to_skip); + new_stmt = e->call_stmt; + if (e->callee->clone.combined_args_to_skip) + new_stmt + = gimple_call_copy_skip_args (new_stmt, + e->callee->clone.combined_args_to_skip); + if (skip_bounds) + new_stmt = chkp_copy_call_skip_bounds (new_stmt); + gimple_call_set_fndecl (new_stmt, e->callee->decl); gimple_call_set_fntype (new_stmt, gimple_call_fntype (e->call_stmt)); diff --git a/gcc/testsuite/gcc.target/i386/mpx/chkp-fix-calls-1.c b/gcc/testsuite/gcc.target/i386/mpx/chkp-fix-calls-1.c new file mode 100644 index 000..cb4d229 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/mpx/chkp-fix-calls-1.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fcheck-pointer-bounds -mmpx" } */ + +#include "math.h" + +double +test1 (double x, double y, double (*fn)(double, double)) +{ + return fn (x, y); +} + +double +test2 (double x, double y) +{ + return test1 (x, y, copysign); +} diff --git a/gcc/testsuite/gcc.target/i386/mpx/chkp-fix-calls-2.c b/gcc/testsuite/gcc.target/i386/mpx/chkp-fix-calls-2.c new file mode 100644 index 000..951e7de --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/mpx/chkp-fix-calls-2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fcheck-pointer-bounds -mmpx -fno-inline" } */ + +#include "math.h" + +double +test1 (double x, double y, double (*fn)(double, double)) +{ + return fn (x, y); +} + +double +test2 (double x, double y) +{ + return test1 (x, y, copysign); +} diff --git a/gcc/testsuite/gcc.target/i386/mpx/chkp-fix-calls-3.c b/gcc/testsuite/gcc.target/i386/mpx/chkp-fix-calls-3.c new file mode 100755 index 000..439f631 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/mpx/chkp-fix-calls-3.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fexceptions -fcheck-pointer-bounds -mmpx" } */ + +extern int f2 (const char*, int, ...); +extern long int f3 (int *); +extern void err (void) __attribute__((__error__("error"))); + +extern __inline __attribute__ ((__always_inline__)) int +f1 (int i, ...) +{ + if (__builtin_constant_p (i)) +{ + if (i) + err (); + return f2 ("", i); +} + + return f2 ("", i); +} + +int +test () +{ + int i; + + if (f1 (0)) +if (f3 (&i)) + i = 0
Re: [CHKP, PATCH] Fix LTO cgraph merge for instrumented functions
On 12 Mar 13:27, Ilya Enkovich wrote: > Hi, > > Currently cgraph merge has several issues with instrumented code: > - original function node may be removed => no assembler name conflict is > detected between function and variable > - only orig_decl name is privatized for instrumented function => node still > shares assembler name which causes infinite privatization loop > - information about changed name is stored in file_data of instrumented node > => original section name may be not found for original function > - chkp reference is not fixed when nodes are merged > > This patch should fix theese problems by keeping instrumentation thunks > reachable, privatizing both nodes and fixing chkp references. Bootstrapped > and tested on x86_64-unknown-linux-gnu. OK for trunk? > > > Thanks, > Ilya Here is an updated version where chkp_maybe_fix_chkp_ref also removes duplicating references which may appear during cgraph nodes merge. Bootstrapped and tested on x86_64-unknown-linux-gnu. OK for trunk? Thanks, Ilya -- gcc/ 2015-03-19 Ilya Enkovich * ipa-chkp.h (chkp_maybe_fix_chkp_ref): New. * ipa-chkp.c (chkp_maybe_fix_chkp_ref): New. * ipa.c (symbol_table::remove_unreachable_nodes): Don't remove instumentation thunks calling reachable functions. * lto-cgraph.c: Include ipa-chkp.h. (input_symtab): Fix chkp references for boundary nodes. * lto/lto-partition.c (privatize_symbol_name_1): New. (privatize_symbol_name): Privatize both decl and orig_decl names for instrumented functions. * lto/lto-symtab.c: Include ipa-chkp.h. (lto_cgraph_replace_node): Fix chkp references for merged function nodes. gcc/testsuite/ 2015-03-19 Ilya Enkovich * gcc.dg/lto/chkp-privatize-1_0.c: New. * gcc.dg/lto/chkp-privatize-1_1.c: New. * gcc.dg/lto/chkp-privatize-2_0.c: New. * gcc.dg/lto/chkp-privatize-2_1.c: New. diff --git a/gcc/ipa-chkp.c b/gcc/ipa-chkp.c index 0b857ff..7a7fc3c 100644 --- a/gcc/ipa-chkp.c +++ b/gcc/ipa-chkp.c @@ -414,6 +414,40 @@ chkp_instrumentable_p (tree fndecl) && (!fn || !copy_forbidden (fn, fndecl))); } +/* Check NODE has a correct IPA_REF_CHKP reference. + Create a new reference if required. */ + +void +chkp_maybe_fix_chkp_ref (cgraph_node *node) +{ + /* Firstly check node needs IPA_REF_CHKP. */ + if (node->instrumentation_clone + || !node->instrumented_version) +return; + + /* Check we already have a proper IPA_REF_CHKP. + Remove incorrect refs. */ + int i; + ipa_ref *ref = NULL; + bool found = false; + for (i = 0; node->iterate_reference (i, ref); i++) +if (ref->use == IPA_REF_CHKP) + { + /* Found proper reference. */ + if (ref->referred == node->instrumented_version + && !found) + found = true; + else + { + ref->remove_reference (); + i--; + } + } + + if (!found) +node->create_reference (node->instrumented_version, IPA_REF_CHKP, NULL); +} + /* Return clone created for instrumentation of NODE or NULL. */ cgraph_node * diff --git a/gcc/ipa-chkp.h b/gcc/ipa-chkp.h index 6708fe9..5fa7d88 100644 --- a/gcc/ipa-chkp.h +++ b/gcc/ipa-chkp.h @@ -24,5 +24,6 @@ extern tree chkp_copy_function_type_adding_bounds (tree orig_type); extern tree chkp_maybe_clone_builtin_fndecl (tree fndecl); extern cgraph_node *chkp_maybe_create_clone (tree fndecl); extern bool chkp_instrumentable_p (tree fndecl); +extern void chkp_maybe_fix_chkp_ref (cgraph_node *node); #endif /* GCC_IPA_CHKP_H */ diff --git a/gcc/ipa.c b/gcc/ipa.c index b3752de..3054afe 100644 --- a/gcc/ipa.c +++ b/gcc/ipa.c @@ -492,7 +492,22 @@ symbol_table::remove_unreachable_nodes (FILE *file) } else if (cnode->thunk.thunk_p) enqueue_node (cnode->callees->callee, &first, &reachable); - + + /* For instrumentation clones we always need original +function node for proper LTO privatization. */ + if (cnode->instrumentation_clone + && reachable.contains (cnode) + && cnode->definition) + { + gcc_assert (cnode->instrumented_version || in_lto_p); + if (cnode->instrumented_version) + { + enqueue_node (cnode->instrumented_version, &first, + &reachable); + reachable.add (cnode->instrumented_version); + } + } + /* If any reachable function has simd clones, mark them as reachable as well. */ if (cnode->simd_clones) diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index c875fed..b9196eb 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -80,6 +80,7 @@ along with GCC; see the file COPYING3. If not see #include "pass_manager.h" #include "ipa-utils.h" #include "omp-low.h" +#include "ipa-chkp.h" /* True when asm nodes ha
Re: [PATCH, rtl-optimization]: Fix PR60851, ICE: in extract_constrain_insn_cached, at recog.c
On Thu, Mar 19, 2015 at 8:44 AM, Jakub Jelinek wrote: > On Wed, Mar 18, 2015 at 08:00:33PM +0100, Uros Bizjak wrote: >> 2015-03-18 Uros Bizjak >> >> PR rtl-optimization/60851 >> * recog.c (constrain_operands): Accept a pseudo register before reload >> for LRA enabled targets. > As the two adjacent conditions are mostly the same, perhaps it might be > better written as >|| ((/* Before reload, accept a pseudo, since >LRA can turn it into a mem. > (targetm.lra_p () && strict < 0) > /* During reload, accept a pseudo. */ > || reload_in_progress) >&& REG_P (op) >&& REGNO (op) >= FIRST_PSEUDO_REGISTER))) > > or put REG_P && REGNO checks first and only then test when. Yeah, I thought about this particular CSE a bit. But since these are two conceptually different conditions, and the formatting (and comments) just didn't fit into available space, I wrote it this way. IMO, it is more readable, better follows the intended logic, and avoids even more indents. Also, I am pretty sure that any decent compiler can CSE this part on its own. However, the condition can be slightly improved by rewriting it to: /* Before reload, accept a pseudo, since LRA can turn it into a mem. */ || (strict < 0 && targetm.lra_p () && REG_P (op) && REGNO (op) >= FIRST_PSEUDO_REGISTER) so, we have cheaper tests in the front to shortcut more expensive tests later. > For 4.9 backport, please wait a few days after it goes into the trunk. Thanks! Uros.
Re: [PATCH, stage1] Make parloops gate more strict
On Wed, Mar 18, 2015 at 6:02 PM, Tom de Vries wrote: > On 18-03-15 12:18, Richard Biener wrote: >> >> On Wed, Mar 18, 2015 at 12:03 PM, Tom de Vries >> wrote: >>> >>> On 18-03-15 11:16, Richard Biener wrote: On Fri, Mar 13, 2015 at 4:28 PM, Tom de Vries wrote: > > > On 13-03-15 13:36, Richard Biener wrote: >> >> >> >> On Fri, Mar 13, 2015 at 1:07 PM, Jakub Jelinek >> wrote: >>> >>> >>> >>> On Fri, Mar 13, 2015 at 01:04:57PM +0100, Richard Biener wrote: Not really (I don't like -fdump-passes ...), but we need to make sure that -fdump-passes doesn't crash (because it runs very early and with cfun == NULL I think) >>> >>> >>> >>> >>> If it runs with cfun == NULL, then supposedly the gates that are >>> dependent >>> on current function should for -fdump-passes purposes also return >>> true >>> if cfun == NULL (well, of course do all the unconditional checks). >>> Though of course, with optimize/target attributes this is harder, as >>> different functions can use different options. >> >> >> >> >> Yes, one reason why I think -fdump-passes is just broken >> implementation-wise. >> > > Atm fdump-passes doesn't run with cfun == NULL. > > From pass_manager::dump_passes: > ... > FOR_EACH_FUNCTION (n) > if (DECL_STRUCT_FUNCTION (n->decl)) > { > node = n; > break; > } > > if (!node) > return; > > push_cfun (DECL_STRUCT_FUNCTION (node->decl)); Um - this now picks a random function which may be one with an optimize or target attribute associated to it. >>> >>> Indeed. >>> >>> Attached patch removes that code, and runs the gates with cfun == NULL >>> for >>> -fdump-passes. It at least builds, and allows us to compile >>> src/gcc/testsuite/gcc.dg/dump-pass.c with -O2 -fdump-passes. >>> >>> Should I bootstrap and reg-test, or do you see a problem with this >>> approach? >> >> >> Yeah - it makes the -fdump-passes "hack" more pervasive throughout >> the compiler. >> >> I suppose it should instead build & push a "dummy" sturct function. >> > > Like this? Looks good to me. Richard. >> Well, or simply don't care for it's brokeness. > > > I'm afraid leaving it broken just means we'll keep coming back to it. So I'd > prefer either fixing or removing. > > Thanks, > - Tom >
[PATCH, CHKP] Fix references for instrumented aliases
Hi, Currently when we clone alias we also clone its target and create alias reference during alias target cloning. This doesn't work in case alias clone is removed and later is re-created. This patch moves alias reference creation to fix that. Bootstrapped and tested on x86_64-unknown-linux-gnu. Will apply after prerequisite patch (https://gcc.gnu.org/ml/gcc-patches/2015-03/msg00992.html) is applied. Thanks, Ilya -- gcc/ 2015-03-19 Ilya Enkovich * ipa-chkp.c (chkp_maybe_create_clone): Create alias reference when cloning alias node. gcc/testsuite/ 2015-03-19 Ilya Enkovich * gcc.dg/lto/chkp-removed-alias_0.c: New. diff --git a/gcc/ipa-chkp.c b/gcc/ipa-chkp.c index 0b857ff..bd3db5f 100644 --- a/gcc/ipa-chkp.c +++ b/gcc/ipa-chkp.c @@ -535,12 +535,7 @@ chkp_maybe_create_clone (tree fndecl) /* Clone all aliases. */ for (i = 0; node->iterate_direct_aliases (i, ref); i++) - { - struct cgraph_node *alias = dyn_cast (ref->referring); - struct cgraph_node *chkp_alias - = chkp_maybe_create_clone (alias->decl); - chkp_alias->create_reference (clone, IPA_REF_ALIAS, NULL); - } + chkp_maybe_create_clone (ref->referring->decl); /* Clone all thunks. */ for (e = node->callers; e; e = e->next_caller) @@ -563,7 +558,10 @@ chkp_maybe_create_clone (tree fndecl) ref = node->ref_list.first_reference (); if (ref) - chkp_maybe_create_clone (ref->referred->decl); + { + target = chkp_maybe_create_clone (ref->referred->decl); + clone->create_reference (target, IPA_REF_ALIAS); + } if (node->alias_target) { diff --git a/gcc/testsuite/gcc.dg/lto/chkp-removed-alias_0.c b/gcc/testsuite/gcc.dg/lto/chkp-removed-alias_0.c new file mode 100644 index 000..96d728d --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/chkp-removed-alias_0.c @@ -0,0 +1,28 @@ +/* { dg-lto-do link } */ +/* { dg-require-effective-target mpx } */ +/* { dg-lto-options { { -O2 -flto -flto-partition=max -fcheck-pointer-bounds -mmpx } } } */ + +int test1 (const char *c) +{ + return c[0] * 2; +} + +int test2 (const char *c) +{ + return c[1] * 3; +} + +int test1_alias (const char *c) __attribute__ ((alias ("test1"))); +int test2_alias (const char *c) __attribute__ ((alias ("test2"))); + +struct S +{ + int (*fnptr[2]) (const char *); +} S; + +struct S s = {test1_alias, test2_alias}; + +int main (int argc, const char **argv) +{ + return s.fnptr[argc] (argv[0]); +}
Re: Fix PR 65177: diamonds are not valid execution threads for jump threading
On Wed, Mar 18, 2015 at 11:35 PM, Sebastian Pop wrote: > Hi, > > the attached patch fixes PR 65177 in which the code generator of FSM jump > thread > would create a diamond on the copied path: see https://gcc.gnu.org/PR65177#c18 > for a detailed description. > > The patch is renaming SEME into jump_thread as the notion of SEME is more > general than the special case that we are interested in, in the case of > jump-threading: an execution thread to be jump-threaded has the property that > each node on the path has exactly one predecessor, disallowing any other > control flow like diamonds or back-edge loops within the SEME region. > > The patch passes regstrap on x86-64-linux, and fixes the make check of hmmer. > Ok to commit? I don't like the special-casing in copy_bbs (with bb_in_bbs doing a linear walk anyway...). Is the first test + /* When creating a jump-thread, we only redirect edges to +consecutive basic blocks. */ + if (i + 1 < n) + { + if (e->dest != bbs[i + 1]) + continue; not really always the case for jump threads? copy_bbs doesn't impose any restriction on ordering on bbs[], so it seems to be a speciality of the caller. + { + /* Do not jump back into the jump-thread path from the last +BB. */ + if (bb_in_bbs (e->dest, bbs, n - 1)) + continue; this one sounds easier to ensure in the caller as well - just remove the extra unwanted edge. That said - please instead fixup after copy_bbs in duplicate_seme_region. Richard. > Thanks, > Sebastian
[PATCH] Make wider use of "v" constraint in i386.md
Hi, There were some discussion about "x" constraints being too conservative for some patterns in i386.md. Patch below fixes it. This is probably stage1 material. ChangeLog: gcc/ 2015-03-19 Ilya Tocar * config/i386/i386.h (EXT_SSE_REG_P): New. * config/i386/i386.md (*cmpi_mixed): Use "v" constraint. (*cmpi_sse): Ditto. (*movxi_internal_avx512f): Ditto. (define_split): Check for xmm16+, when splitting scalar float_extend. (*extendsfdf2_mixed): Use "v" constraint. (*extendsfdf2_sse): Ditto. (define_split): Check for xmm16+, when splitting scalar float_truncate. (*truncdfsf_fast_sse): Use "v" constraint. (fix_trunc_sse): Ditto. (*float2_sse): Ditto. (define_peephole2): Check for xmm16+, when converting scalar float_truncate. (define_peephole2): Check for xmm16+, when converting scalar float_extend. (*fop__comm_mixed): Use "v" constraint. (*fop__comm_sse): Ditto. (*fop__1_mixed): Ditto. (*sqrt2_sse): Ditto. (*ieee_s3): Ditto. --- gcc/config/i386/i386.h | 2 ++ gcc/config/i386/i386.md | 82 +++-- 2 files changed, 47 insertions(+), 37 deletions(-) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 1e755d3..0b8c57a 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1477,6 +1477,8 @@ enum reg_class #define REX_SSE_REGNO_P(N) \ IN_RANGE ((N), FIRST_REX_SSE_REG, LAST_REX_SSE_REG) +#define EXT_SSE_REG_P(X) (REG_P (X) && EXT_REX_SSE_REGNO_P (REGNO (X))) + #define EXT_REX_SSE_REGNO_P(N) \ IN_RANGE ((N), FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 1129b93..38eaf95 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1639,8 +1639,8 @@ (define_insn "*cmpi_mixed" [(set (reg:FPCMP FLAGS_REG) (compare:FPCMP - (match_operand:MODEF 0 "register_operand" "f,x") - (match_operand:MODEF 1 "nonimmediate_operand" "f,xm")))] + (match_operand:MODEF 0 "register_operand" "f,v") + (match_operand:MODEF 1 "nonimmediate_operand" "f,vm")))] "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)" "* return output_fp_compare (insn, operands, true, @@ -1666,8 +1666,8 @@ (define_insn "*cmpi_sse" [(set (reg:FPCMP FLAGS_REG) (compare:FPCMP - (match_operand:MODEF 0 "register_operand" "x") - (match_operand:MODEF 1 "nonimmediate_operand" "xm")))] + (match_operand:MODEF 0 "register_operand" "v") + (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] "TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)" "* return output_fp_compare (insn, operands, true, @@ -1959,8 +1959,8 @@ (set_attr "length_immediate" "1")]) (define_insn "*movxi_internal_avx512f" - [(set (match_operand:XI 0 "nonimmediate_operand" "=x,x ,m") - (match_operand:XI 1 "vector_move_operand" "C ,xm,x"))] + [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,m") + (match_operand:XI 1 "vector_move_operand" "C ,vm,v"))] "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (which_alternative) @@ -4003,7 +4003,9 @@ (match_operand:SF 1 "nonimmediate_operand")))] "TARGET_USE_VECTOR_FP_CONVERTS && optimize_insn_for_speed_p () - && reload_completed && SSE_REG_P (operands[0])" + && reload_completed && SSE_REG_P (operands[0]) + && (!EXT_SSE_REG_P (operands[0]) + || TARGET_AVX512VL)" [(set (match_dup 2) (float_extend:V2DF (vec_select:V2SF @@ -4048,9 +4050,9 @@ "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));") (define_insn "*extendsfdf2_mixed" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x") + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,v") (float_extend:DF - (match_operand:SF 1 "nonimmediate_operand" "fm,f,xm")))] + (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))] "TARGET_SSE2 && TARGET_MIX_SSE_I387" { switch (which_alternative) @@ -4071,8 +4073,8 @@ (set_attr "mode" "SF,XF,DF")]) (define_insn "*extendsfdf2_sse" - [(set (match_operand:DF 0 "nonimmediate_operand" "=x") -(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))] + [(set (match_operand:DF 0 "nonimmediate_operand" "=v") +(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "vm")))] "TARGET_SSE2 && TARGET_SSE_MATH" "%vcvtss2sd\t{%1, %d0|%d0, %1}" [(set_attr "type" "ssecvt") @@ -4155,7 +4157,9 @@ (match_operand:DF 1 "nonimmediate_operand")))] "TARGET_USE_VECTOR_FP_CONVERTS && optimize_insn_for_speed_p () - && reload_completed && SSE_REG_P (operands[0])" + && reload_completed && SSE_REG_P (operands[0]) + && (!EXT_SSE_REG_P (operands[0]) + || TARGET_AVX512VL)" [(set (match_dup 2) (vec_concat:V4SF (float_truncate:
[PATCH] Another -fsanitize=thread fix (PR sanitizer/65400)
Hi! This patch fixes the other part of the PR. On this testcase, ICF creates a thunk and sets gimple_call_set_tail true on the call in there. No TSAN_FUNC_EXIT internal call is added, which isn't a big deal, tsan pass has code to add it if there are none. But, nothing was clearing the tail call flag when this was done, and with -fsanitize=thread in sanitized functions all functions that call other functions must start with the __tsan_func_entry call and call __tsan_func_exit at the end (unless they are noreturn). Thus, we can only tail call __tsan_func_exit in instrumented functions. The patch clears the tail call flag on all calls but TSAN_FUNC_EXIT (for which instrument_gimple isn't called at all). Regtested on x86_64-linux, ok for trunk? 2015-03-19 Bernd Edlinger Jakub Jelinek PR sanitizer/65400 * tsan.c (instrument_gimple): Clear tail call flag on calls. * c-c++-common/tsan/pr65400-3.c: New test. --- gcc/tsan.c.jj 2015-01-19 09:31:24.0 +0100 +++ gcc/tsan.c 2015-03-19 09:51:39.086804965 +0100 @@ -680,6 +680,10 @@ instrument_gimple (gimple_stmt_iterator && (gimple_call_fndecl (stmt) != builtin_decl_implicit (BUILT_IN_TSAN_INIT))) { + /* All functions with function call will have exit instrumented, +therefore no function calls other than __tsan_func_exit +shall appear in the functions. */ + gimple_call_set_tail (as_a (stmt), false); if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) instrument_builtin_call (gsi); return true; --- gcc/testsuite/c-c++-common/tsan/pr65400-3.c.jj 2015-03-19 10:07:15.453302478 +0100 +++ gcc/testsuite/c-c++-common/tsan/pr65400-3.c 2015-03-19 10:27:11.941515580 +0100 @@ -0,0 +1,75 @@ +/* PR sanitizer/65400 */ +/* { dg-shouldfail "tsan" } */ +/* { dg-additional-options "-fno-omit-frame-pointer -ldl" } */ + +#include +#include "tsan_barrier.h" + +static pthread_barrier_t barrier; +int v; + +int +fn1 (int a, int b, int c) +{ + int r = (a ^ b) % c; + r = r * a * b + c; + r = (r ^ b) % c; + return r; +} + +int +fn2 (int a, int b, int c) +{ + int r = (a ^ b) % c; + r = r * a * b + c; + r = (r ^ b) % c; + return r; +} + +__attribute__((noinline, noclone)) void +foo (void) +{ + barrier_wait (&barrier); + barrier_wait (&barrier); + v++; +} + +__attribute__((noinline, noclone)) void +bar (void) +{ + int (*fna) (int, int, int); + int (*fnb) (int, int, int); + int i; + asm ("" : "=g" (fna) : "0" (fn1)); + asm ("" : "=g" (fnb) : "0" (fn2)); + for (i = 0; i < 128; i++) +{ + fna (0, 0, i + 1); + fnb (0, 0, i + 1); +} + foo (); +} + +__attribute__((noinline, noclone)) void * +tf (void *arg) +{ + (void) arg; + bar (); + return NULL; +} + +int +main () +{ + pthread_t th; + barrier_init (&barrier, 2); + if (pthread_create (&th, NULL, tf, NULL)) +return 0; + barrier_wait (&barrier); + v++; + barrier_wait (&barrier); + pthread_join (th, NULL); + return 0; +} + +/* { dg-output "WARNING: ThreadSanitizer: data race.*#2 _?tf" } */ Jakub
Re: [PATCH] Another -fsanitize=thread fix (PR sanitizer/65400)
On Thu, 19 Mar 2015, Jakub Jelinek wrote: > Hi! > > This patch fixes the other part of the PR. On this testcase, ICF creates > a thunk and sets gimple_call_set_tail true on the call in there. No > TSAN_FUNC_EXIT internal call is added, which isn't a big deal, tsan pass > has code to add it if there are none. But, nothing was clearing the tail > call flag when this was done, and with -fsanitize=thread in sanitized > functions all functions that call other functions must start with the > __tsan_func_entry call and call __tsan_func_exit at the end (unless they are > noreturn). Thus, we can only tail call __tsan_func_exit in instrumented > functions. The patch clears the tail call flag on all calls but > TSAN_FUNC_EXIT (for which instrument_gimple isn't called at all). > > Regtested on x86_64-linux, ok for trunk? Ok. Thanks, Richard. > 2015-03-19 Bernd Edlinger > Jakub Jelinek > > PR sanitizer/65400 > * tsan.c (instrument_gimple): Clear tail call flag on > calls. > > * c-c++-common/tsan/pr65400-3.c: New test. > > --- gcc/tsan.c.jj 2015-01-19 09:31:24.0 +0100 > +++ gcc/tsan.c2015-03-19 09:51:39.086804965 +0100 > @@ -680,6 +680,10 @@ instrument_gimple (gimple_stmt_iterator >&& (gimple_call_fndecl (stmt) > != builtin_decl_implicit (BUILT_IN_TSAN_INIT))) > { > + /* All functions with function call will have exit instrumented, > + therefore no function calls other than __tsan_func_exit > + shall appear in the functions. */ > + gimple_call_set_tail (as_a (stmt), false); >if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) > instrument_builtin_call (gsi); >return true; > --- gcc/testsuite/c-c++-common/tsan/pr65400-3.c.jj2015-03-19 > 10:07:15.453302478 +0100 > +++ gcc/testsuite/c-c++-common/tsan/pr65400-3.c 2015-03-19 > 10:27:11.941515580 +0100 > @@ -0,0 +1,75 @@ > +/* PR sanitizer/65400 */ > +/* { dg-shouldfail "tsan" } */ > +/* { dg-additional-options "-fno-omit-frame-pointer -ldl" } */ > + > +#include > +#include "tsan_barrier.h" > + > +static pthread_barrier_t barrier; > +int v; > + > +int > +fn1 (int a, int b, int c) > +{ > + int r = (a ^ b) % c; > + r = r * a * b + c; > + r = (r ^ b) % c; > + return r; > +} > + > +int > +fn2 (int a, int b, int c) > +{ > + int r = (a ^ b) % c; > + r = r * a * b + c; > + r = (r ^ b) % c; > + return r; > +} > + > +__attribute__((noinline, noclone)) void > +foo (void) > +{ > + barrier_wait (&barrier); > + barrier_wait (&barrier); > + v++; > +} > + > +__attribute__((noinline, noclone)) void > +bar (void) > +{ > + int (*fna) (int, int, int); > + int (*fnb) (int, int, int); > + int i; > + asm ("" : "=g" (fna) : "0" (fn1)); > + asm ("" : "=g" (fnb) : "0" (fn2)); > + for (i = 0; i < 128; i++) > +{ > + fna (0, 0, i + 1); > + fnb (0, 0, i + 1); > +} > + foo (); > +} > + > +__attribute__((noinline, noclone)) void * > +tf (void *arg) > +{ > + (void) arg; > + bar (); > + return NULL; > +} > + > +int > +main () > +{ > + pthread_t th; > + barrier_init (&barrier, 2); > + if (pthread_create (&th, NULL, tf, NULL)) > +return 0; > + barrier_wait (&barrier); > + v++; > + barrier_wait (&barrier); > + pthread_join (th, NULL); > + return 0; > +} > + > +/* { dg-output "WARNING: ThreadSanitizer: data race.*#2 _?tf" } */ > > Jakub > > -- Richard Biener SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Jennifer Guild, Dilip Upmanyu, Graham Norton HRB 21284 (AG Nuernberg)
Re: [PATCH][RFC] Fix PR63155
On Wed, 18 Mar 2015, Richard Biener wrote: > On March 18, 2015 4:59:30 PM GMT+01:00, Alan Lawrence > wrote: > >Following this patch (r221318), we're seeing what appears to be a > >miscompile of > >glibc on AArch64. This causes quite a bunch of tests to fail, segfaults > >etc., if > >LD_LIBRARY_PATH leads to a libc.so.6 built with that patch vs without > >(same > >glibc sources). We are still working on a reduced testcase, but this is > >proving > >difficult... > > The patch was supposed to end up in the very same code generation. > Thus it's enough to identify the problematic source file and send me > preprocessed source and cc1 invocation so I can investigate with a > cross. Ok - I _think_ that live_track_process_def doesn't handle partitions with more than one member. It's too closely tied to SSA. Thus the whole idea of iterating the coalescing doesn't work without fixing that. Like conservatively with Index: gcc/tree-ssa-coalesce.c === --- gcc/tree-ssa-coalesce.c (revision 221490) +++ gcc/tree-ssa-coalesce.c (working copy) @@ -724,7 +724,10 @@ live_track_clear_var (live_track_p ptr, int p; p = var_to_partition (ptr->map, var); - if (p != NO_PARTITION) + if (p != NO_PARTITION + /* ??? If this partition has more than one member don't do anything. */ + && ptr->map->var_partition->elements[SSA_NAME_VERSION + (partition_to_var (ptr->map, p))].class_count == 1) live_track_remove_partition (ptr, p); } @@ -780,8 +783,11 @@ live_track_process_def (live_track_p ptr if (p == NO_PARTITION) return; - /* Clear the liveness bit. */ - live_track_remove_partition (ptr, p); + /* Clear the liveness bit. + ??? If this partition has more than one member we can't do that. */ + if (ptr->map->var_partition->elements[SSA_NAME_VERSION + (partition_to_var (ptr->map, p))].class_count == 1) +live_track_remove_partition (ptr, p); /* If the bitmap isn't empty now, conflicts need to be added. */ root = basevar_index (ptr->map, p); @@ -789,7 +795,8 @@ live_track_process_def (live_track_p ptr { b = ptr->live_base_partitions[root]; EXECUTE_IF_SET_IN_BITMAP (b, 0, x, bi) -ssa_conflicts_add (graph, p, x); + if (x != (unsigned) p) + ssa_conflicts_add (graph, p, x); } } Does that fix it? Given the issue I'll revert the patch. Thanks, Richard. > Thanks, > Richard. > > >--Alan > > > >Richard Biener wrote: > >> On Mon, 9 Mar 2015, Richard Biener wrote: > >> > >>> On Fri, 6 Mar 2015, Jeff Law wrote: > >>> > On 03/06/15 06:16, Richard Biener wrote: > > This fixes PR63155 and reduces the memory usage at -O0 from > >reported > > 10GB (couldn't verify/update on my small box) to 350MB (still > >worse > > compared to 4.8 which needs only 50MB). > > > > It fixes this by no longer computing live info or building a > >conflict > > graph for coalescing of SSA names flowing over abnormal edges > > (which needs to succeed). > > > > Of course this also removes verification that this coalescing is > >valid > > (but computing this has quadratic cost). With this it turns > > ICEs into miscompiles. > > > > We could restrict verifying that we can perform abnormal > >coalescing > > to ENABLE_CHECKING (and I've wanted a verifier pass that can > >verify > > this multiple times to be able to catch what breaks it and not > >having > > to work back from out-of-SSA ICEing...). > > > > So any opinion on this patch welcome. > > > > Bootstrap and regtest running on x86_64-unknown-linux-gnu. > > > > Ok for trunk? ;) > > > > Thanks, > > Richard. > > > > 2015-03-06 Richard Biener > > > > PR middle-end/63155 > > * tree-ssa-coalesce.c (attempt_coalesce): Handle graph being > >NULL. > > (coalesce_partitions): Split out abnormal coalescing to ... > > (perform_abnormal_coalescing): ... this function. > > (coalesce_ssa_name): Perform abnormal coalescing without > >computing > > live/conflict. > I'd personally like to keep the checking when ENABLE_CHECKING. > > I haven't followed this discussion real closely, but I wonder if > >some kind of > blocking approach would work without blowing up the memory > >consumption. > There's no inherent reason why we have to coalesce everything at > >the same > time. We can use a blocking factor and do coalescing on some N > >number of > SSA_NAMEs at a time. > >>> Yes, that's possible at (quite?) some compile-time cost. Note that > >we > >>> can't really guarantee that the resulting live/conflict problems > >shrink > >>> significantly enough without sorting the coalesces in a different > >way > >>> (not after important coalesces but after their basevars). > >>> > I suspect we can select an N that ultim
Re: [PATCH][2/3][PR65458] Mark omp thread functions as parallelized
On 18-03-15 18:25, Jakub Jelinek wrote: On Wed, Mar 18, 2015 at 06:21:51PM +0100, Tom de Vries wrote: this patch fixes PR65458. The patch marks omp thread functions as parallelized, which means the parloops pass no longer attempts to modify that function. Bootstrapped and reg-tested on x86_64. OK for stage4 trunk? This will certainly not work with LTO. IMHO you instead want some cgraph_node flag, and make sure you stream it out and in for LTO. Updated patch adds a parallelized_function flag to cgraph_node, and uses it instead of the bitmap parallelized_functions in tree-parloops.c. Bootstrapped and reg-tested on x86_64. OK for stage4 trunk? Thanks, - Tom Mark omp thread functions as parallelized 2015-03-19 Tom de Vries PR tree-optimization/65458 * cgraph.c (cgraph_node::dump): Handle parallelized_function field. * cgraph.h (cgraph_node): Add parallelized_function field. * lto-cgraph.c (lto_output_node): Write parallelized_function field. (input_overwrite_node): Read parallelized_function field. * omp-low.c: Add include of tree-parloops.h. (expand_omp_taskreg): Call mark_parallelized_function for child_fn. * tree-parloops.c: Add include of plugin-api.h, ipa-ref.h and cgraph.h. Remove include of gt-tree-parloops.h. (parallelized_functions): Remove static variable. (parallelized_function_p): Rewrite using parallelized_function field of cgraph_node. (mark_parallelized_function): New function. (create_loop_fn): Remove adding to parallelized_functions. * tree-parloops.h (mark_parallelized_function): Declare. --- gcc/cgraph.c| 2 ++ gcc/cgraph.h| 2 ++ gcc/lto-cgraph.c| 2 ++ gcc/omp-low.c | 2 ++ gcc/tree-parloops.c | 35 +-- gcc/tree-parloops.h | 1 + 6 files changed, 26 insertions(+), 18 deletions(-) diff --git a/gcc/cgraph.c b/gcc/cgraph.c index ede58bf..4573057 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -2009,6 +2009,8 @@ cgraph_node::dump (FILE *f) fprintf (f, " only_called_at_exit"); if (opt_for_fn (decl, optimize_size)) fprintf (f, " optimize_size"); + if (parallelized_function) +fprintf (f, " parallelized_function"); fprintf (f, "\n"); diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 52b15c5..650e689 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1317,6 +1317,8 @@ public: unsigned nonfreeing_fn : 1; /* True if there was multiple COMDAT bodies merged by lto-symtab. */ unsigned merged : 1; + /* True if function was created to be executed in parallel. */ + unsigned parallelized_function : 1; private: /* Worker for call_for_symbol_and_aliases. */ diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index c875fed..088de86 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -574,6 +574,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node, bp_pack_value (&bp, node->icf_merged, 1); bp_pack_value (&bp, node->nonfreeing_fn, 1); bp_pack_value (&bp, node->thunk.thunk_p, 1); + bp_pack_value (&bp, node->parallelized_function, 1); bp_pack_enum (&bp, ld_plugin_symbol_resolution, LDPR_NUM_KNOWN, node->resolution); bp_pack_value (&bp, node->instrumentation_clone, 1); @@ -1209,6 +1210,7 @@ input_overwrite_node (struct lto_file_decl_data *file_data, node->icf_merged = bp_unpack_value (bp, 1); node->nonfreeing_fn = bp_unpack_value (bp, 1); node->thunk.thunk_p = bp_unpack_value (bp, 1); + node->parallelized_function = bp_unpack_value (bp, 1); node->resolution = bp_unpack_enum (bp, ld_plugin_symbol_resolution, LDPR_NUM_KNOWN); node->instrumentation_clone = bp_unpack_value (bp, 1); diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 48d73cb..a49a6eb 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -108,6 +108,7 @@ along with GCC; see the file COPYING3. If not see #include "context.h" #include "lto-section-names.h" #include "gomp-constants.h" +#include "tree-parloops.h" /* Lowering of OMP parallel and workshare constructs proceeds in two @@ -5569,6 +5570,7 @@ expand_omp_taskreg (struct omp_region *region) /* Inform the callgraph about the new function. */ DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties; cgraph_node::add_new_function (child_fn, true); + mark_parallelized_function (child_fn); /* Fix the callgraph edges for child_cfun. Those for cfun will be fixed in a following pass. */ diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index a584460..e952f2b 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -75,6 +75,9 @@ along with GCC; see the file COPYING3. If not see #include "tree-parloops.h" #include "omp-low.h" #include "tree-nested.h" +#include "plugin-api.h" +#include "ipa-ref.h" +#include "cgraph.h" /* This pass tries to distribute iterations of loops into several threads. The implementation is straightforward -- for each loop we test whether its @@ -1422,21 +1425,24 @@ separate_decls_in_region
Re: [PATCH][3/3][PR65460] Mark offloaded functions as parallelized
On 18-03-15 18:22, Tom de Vries wrote: Hi, this patch fixes PR65460. The patch marks offloaded functions as parallelized, which means the parloops pass no longer attempts to modify that function. Updated patch to postpone mark_parallelized_function until the corresponding cgraph_node is available, to ensure it works with the updated mark_parallelized_function from patch 2/3. Bootstrapped and reg-tested on x86_64. OK for stage4 trunk? Thanks, - Tom Mark offloaded functions as parallelized 2015-03-18 Tom de Vries PR tree-optimization/65460 * omp-low.c (expand_omp_target): Call mark_parallelized_function for child_fn. --- gcc/omp-low.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index a49a6eb..7195aa3 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -8938,6 +8938,7 @@ expand_omp_target (struct omp_region *region) /* Inform the callgraph about the new function. */ DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties; cgraph_node::add_new_function (child_fn, true); + mark_parallelized_function (child_fn); #ifdef ENABLE_OFFLOADING /* Add the new function to the offload table. */ -- 1.9.1
[PATCH][libiberty] Avoid padding in partition_elem
The following patch re-orders elements to avoid 8 bytes of padding. Bootstrapped on x86_64-unknown-linux-gnu, ok? Thanks, Richard. 2015-03-19 Richard Biener * partition.h (struct partition_elem): Re-order elements to avoid padding. Index: include/partition.h === --- include/partition.h (revision 221490) +++ include/partition.h (working copy) @@ -45,12 +45,12 @@ extern "C" { struct partition_elem { - /* The canonical element that represents the class containing this - element. */ - int class_element; /* The next element in this class. Elements in each class form a circular list. */ struct partition_elem* next; + /* The canonical element that represents the class containing this + element. */ + int class_element; /* The number of elements in this class. Valid only if this is the canonical element for its class. */ unsigned class_count;
Re: [PATCH][2/3][PR65458] Mark omp thread functions as parallelized
On Thu, Mar 19, 2015 at 12:02:01PM +0100, Tom de Vries wrote: > +void > +mark_parallelized_function (tree fndecl) > +{ > + cgraph_node *node = cgraph_node::get (fndecl); > + gcc_assert (node != NULL); > + node->parallelized_function = 1; > } I'm not convinced we need this wrapper, I'd just use cgraph_node::get (fndecl)->parallelized_function = 1; wherever you need to set it. It wouldn't be the first or last flag handled this way. > @@ -1459,10 +1465,6 @@ create_loop_fn (location_t loc) >type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); > >decl = build_decl (loc, FUNCTION_DECL, name, type); > - if (!parallelized_functions) > -parallelized_functions = BITMAP_GGC_ALLOC (); > - bitmap_set_bit (parallelized_functions, DECL_UID (decl)); > - More importantly, you aren't marking the function as parallelized here. That most likely defeats the original purpose of the bitmap. Perhaps it is too early to create cgraph node here, but you should ensure that it is done perhaps later in create_loop_fn. Jakub
Re: [PATCH][libiberty] Avoid padding in partition_elem
On Thu, Mar 19, 2015 at 12:09:10PM +0100, Richard Biener wrote: > > The following patch re-orders elements to avoid 8 bytes of padding. > > Bootstrapped on x86_64-unknown-linux-gnu, ok? > > Thanks, > Richard. > > 2015-03-19 Richard Biener > > * partition.h (struct partition_elem): Re-order elements to > avoid padding. Ok, thanks. Jakub
Re: [PATCH][2/3][PR65458] Mark omp thread functions as parallelized
On 19-03-15 12:11, Jakub Jelinek wrote: On Thu, Mar 19, 2015 at 12:02:01PM +0100, Tom de Vries wrote: +void +mark_parallelized_function (tree fndecl) +{ + cgraph_node *node = cgraph_node::get (fndecl); + gcc_assert (node != NULL); + node->parallelized_function = 1; } I'm not convinced we need this wrapper, I'd just use cgraph_node::get (fndecl)->parallelized_function = 1; wherever you need to set it. It wouldn't be the first or last flag handled this way. Sure, I can update that, I'll retest and repost. @@ -1459,10 +1465,6 @@ create_loop_fn (location_t loc) type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); decl = build_decl (loc, FUNCTION_DECL, name, type); - if (!parallelized_functions) -parallelized_functions = BITMAP_GGC_ALLOC (); - bitmap_set_bit (parallelized_functions, DECL_UID (decl)); - More importantly, you aren't marking the function as parallelized here. That most likely defeats the original purpose of the bitmap. Perhaps it is too early to create cgraph node here, but you should ensure that it is done perhaps later in create_loop_fn. Indeed, it's not done here, but it is still done, only later. The function we create in parloops is split off using omp_expand, and that's where we mark the function as parallelized, just like other omp functions, in expand_omp_taskreg. Thanks, - Tom
Re: [PATCH][2/3][PR65458] Mark omp thread functions as parallelized
On Thu, Mar 19, 2015 at 12:27:04PM +0100, Tom de Vries wrote: > Sure, I can update that, I'll retest and repost. Yes, please. Probably the tree-parloops.h include will not be needed either then. > Indeed, it's not done here, but it is still done, only later. > > The function we create in parloops is split off using omp_expand, and that's > where we mark the function as parallelized, just like other omp functions, > in expand_omp_taskreg. Ah, you're right. Jakub
Re: [Patch, Fortran, PR 64787 a.o., v2] Invalid code on sourced allocation of class(*) character string
Hi Dominique, Hi all, please find attached a new version of the patch to fix pr64787 after processing Dominique's comments. Thank you very much for your work, Dominique. The patch now also fixes: pr63230 - allocation of deferred length character as derived type component causes internal compiler error pr51550 - ICE in gfc_get_derived_type, at fortran/trans-types.c:2401 (I believe the fortran code in the pr is not legal and fixed it; the fixed one now runs.) It partially fixes: pr55901 - [OOP] type is (character(len=*)) misinterpreted as array (The codes compile and run, but valgrind reports accesses to uninitialized memory; I am looking into this.) pr54070 - [4.8/4.9/5 Regression] Wrong code with allocatable deferred-length (array) function results (Compiles again (didn't with the first version of the patch for 64787), but still segfaults at runtime; -> agenda) This patch needs my previous patches as stated in: https://gcc.gnu.org/ml/fortran/2015-03/msg00076.html Bootstraps and regtests ok on x86_64-linux-gnu/F20. Reviews and comments welcome. Regards, Andre -- Andre Vehreschild * Email: vehre ad gmx dot de pr64787_v2.clog Description: Binary data diff --git a/gcc/fortran/class.c b/gcc/fortran/class.c index 786876c..455aa69 100644 --- a/gcc/fortran/class.c +++ b/gcc/fortran/class.c @@ -234,6 +234,9 @@ gfc_add_component_ref (gfc_expr *e, const char *name) } if (*tail != NULL && strcmp (name, "_data") == 0) next = *tail; + else +/* Avoid losing memory. */ +gfc_free_ref_list (*tail); (*tail) = gfc_get_ref(); (*tail)->next = next; (*tail)->type = REF_COMPONENT; @@ -2562,13 +2565,19 @@ find_intrinsic_vtab (gfc_typespec *ts) c->attr.access = ACCESS_PRIVATE; /* Build a minimal expression to make use of - target-memory.c/gfc_element_size for 'size'. */ + target-memory.c/gfc_element_size for 'size'. Special handling + for character arrays, that are not constant sized: to support + len(str)*kind, only the kind information is stored in the + vtab. */ e = gfc_get_expr (); e->ts = *ts; e->expr_type = EXPR_VARIABLE; c->initializer = gfc_get_int_expr (gfc_default_integer_kind, NULL, - (int)gfc_element_size (e)); + ts->type == BT_CHARACTER + && charlen == 0 ? + ts->kind : + (int)gfc_element_size (e)); gfc_free_expr (e); /* Add component _extends. */ diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index f55c691..f4fa9c8 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -3168,6 +3168,7 @@ void gfc_add_component_ref (gfc_expr *, const char *); void gfc_add_class_array_ref (gfc_expr *); #define gfc_add_data_component(e) gfc_add_component_ref(e,"_data") #define gfc_add_vptr_component(e) gfc_add_component_ref(e,"_vptr") +#define gfc_add_len_component(e) gfc_add_component_ref(e,"_len") #define gfc_add_hash_component(e) gfc_add_component_ref(e,"_hash") #define gfc_add_size_component(e) gfc_add_component_ref(e,"_size") #define gfc_add_def_init_component(e) gfc_add_component_ref(e,"_def_init") diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c index 54f8f4a..697a17a 100644 --- a/gcc/fortran/trans-array.c +++ b/gcc/fortran/trans-array.c @@ -4975,8 +4975,7 @@ static tree gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, gfc_expr ** lower, gfc_expr ** upper, stmtblock_t * pblock, stmtblock_t * descriptor_block, tree * overflow, - tree expr3_elem_size, tree *nelems, gfc_expr *expr3, - gfc_typespec *ts) + tree expr3_elem_size, tree *nelems, gfc_expr *expr3) { tree type; tree tmp; @@ -5002,7 +5001,7 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, /* Set the dtype. */ tmp = gfc_conv_descriptor_dtype (descriptor); - gfc_add_modify (descriptor_block, tmp, gfc_get_dtype (TREE_TYPE (descriptor))); + gfc_add_modify (descriptor_block, tmp, gfc_get_dtype (type)); or_expr = boolean_false_node; @@ -5156,9 +5155,6 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, tmp = TYPE_SIZE_UNIT (tmp); } } - else if (ts->type != BT_UNKNOWN && ts->type != BT_CHARACTER) -/* FIXME: Properly handle characters. See PR 57456. */ -tmp = TYPE_SIZE_UNIT (gfc_typenode_for_spec (ts)); else tmp = TYPE_SIZE_UNIT (gfc_get_element_type (type)); @@ -5230,7 +5226,7 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, bool gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, tree errlen, tree label_finish, tree expr3_elem_size, - tree *nelems, gfc_expr *expr3, gfc_typespec *ts) + tree *nelems, gfc_expr *expr3) { tree tmp; tree pointer; @@ -5315,7 +5311,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, size = gfc_array_init_size (se->expr, ref->u.ar.a
Re: [GOOGLE] Fixes for TLS wrapper and init functions in LIPO mode
On Wed, Mar 18, 2015 at 9:25 PM, Xinliang David Li wrote: > get_local_tls_init_fn can be called from other contexts other than > 'handle_tls_init' -- is the added new assertion safe ? In fact there is still an issue here, for file static TLS vars. Will work on a fix and send the original test case (forgot to include in first patch) and the new one with the fixed patch. Teresa > > David > > On Wed, Mar 18, 2015 at 9:18 PM, Teresa Johnson > wrote: >> >> Ensure TLS variable wrapper and init functions are recorded at >> the module scope upon creation so that they are cleared when >> popping the module scope in between modules in LIPO mode. >> >> Also, do not generate a local TLS init function (__tls_init) >> for aux functions, only in the primary modules. >> >> Passes regression tests. Ok for Google branches? >> Teresa >> >> 2015-03-18 Teresa Johnson >> >> Google ref b/19618364. >> * cp/decl2.c (get_local_tls_init_fn): Assert on aux module. >> (get_tls_init_fn): Record new global decl. >> (get_tls_wrapper_fn): Ditto. >> (handle_tls_init): Skip aux modules. >> >> Index: cp/decl2.c >> === >> --- cp/decl2.c (revision 221425) >> +++ cp/decl2.c (working copy) >> @@ -3036,6 +3036,9 @@ get_local_tls_init_fn (void) >>DECL_ARTIFICIAL (fn) = true; >>mark_used (fn); >>SET_IDENTIFIER_GLOBAL_VALUE (sname, fn); >> + /* In LIPO mode we should not generate local init functions for >> + the aux modules (see handle_tls_init). */ >> + gcc_assert (!L_IPO_IS_AUXILIARY_MODULE); >> } >>return fn; >> } >> @@ -3100,6 +3103,11 @@ get_tls_init_fn (tree var) >>DECL_BEFRIENDING_CLASSES (fn) = var; >> >>SET_IDENTIFIER_GLOBAL_VALUE (sname, fn); >> + /* In LIPO mode make sure we record the new global value so that it >> + is cleared before parsing the next aux module. */ >> + if (L_IPO_COMP_MODE && !is_parsing_done_p ()) >> +add_decl_to_current_module_scope (fn, >> + NAMESPACE_LEVEL >> (global_namespace)); >> } >>return fn; >> } >> @@ -3157,6 +3165,11 @@ get_tls_wrapper_fn (tree var) >>DECL_BEFRIENDING_CLASSES (fn) = var; >> >>SET_IDENTIFIER_GLOBAL_VALUE (sname, fn); >> + /* In LIPO mode make sure we record the new global value so that it >> + is cleared before parsing the next aux module. */ >> + if (L_IPO_COMP_MODE && !is_parsing_done_p ()) >> +add_decl_to_current_module_scope (fn, >> + NAMESPACE_LEVEL >> (global_namespace)); >> } >>return fn; >> } >> @@ -4179,6 +4192,14 @@ clear_decl_external (struct cgraph_node *node, voi >> static void >> handle_tls_init (void) >> { >> + /* In LIPO mode we should not generate local init functions for >> + aux modules. The wrapper will reference the variable's init function >> + that is defined in its own primary module. Otherwise there is >> + a name conflict between the primary and aux __tls_init functions, >> + and difficulty ensuring each TLS variable is initialized exactly >> once. */ >> + if (L_IPO_IS_AUXILIARY_MODULE) >> +return; >> + >>tree vars = prune_vars_needing_no_initialization (&tls_aggregates); >>if (vars == NULL_TREE) >> return; >> >> >> -- >> Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413 > > -- Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413
[PATCH][gcse] Use emit_move_insn rather than creating SET rtx and emitting that
Hi all, This patches fixes the ICE reported at https://gcc.gnu.org/ml/gcc-patches/2015-03/msg00949.html The problem is that gcse generates a (set (reg:OI) (const_int 0)) that doesn't match anything in the arm backend. That SET was created through processing an insn generated by the neon_movoi insn in neon.md. gcse created an OImode reg, put it in a SET rtx with const_int 0 and tried to recognise it which failed. As pointed out by James Greenhalgh offline the correct thing would have been to do an emit_move_insn to let the backend expanders do the right thing (especially in the concerned testcase gcc.c-torture/execute/pr65427.c that uses 256-bit vectors that arm doesn't support natively). Bootstrapped and tested on arm, x86, aarch64. This ICE doesn't happen with 4.9 and 4.8 so it's only a regression for GCC 5. The currently ICE'ins testcase passes now, so no new testcase is added. Ok for trunk? Thanks, Kyrill 2015-03-18 Kyrylo Tkachov James Greenhalgh * gcse.c (process_insert_insn): Use emit_move_insn rather than generating a SET rtx and emitting it.commit 9b6366d27deb2a366c7cfd308e02ab158527f430 Author: Kyrylo Tkachov Date: Wed Mar 18 16:05:36 2015 + [gcse] Use emit_move_insn rather than creating SET rtx and emitting that diff --git a/gcc/gcse.c b/gcc/gcse.c index e03b36c..cc55e91 100644 --- a/gcc/gcse.c +++ b/gcc/gcse.c @@ -2168,7 +2168,7 @@ process_insert_insn (struct gcse_expr *expr) insn will be recognized (this also adds any needed CLOBBERs). */ else { - rtx_insn *insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp)); + rtx_insn *insn = emit_move_insn (reg, exp); if (insn_invalid_p (insn, false)) gcc_unreachable ();
Re: [PATCH][gcse] Use emit_move_insn rather than creating SET rtx and emitting that
On Thu, Mar 19, 2015 at 01:45:19PM +, Kyrill Tkachov wrote: > Bootstrapped and tested on arm, x86, aarch64. > This ICE doesn't happen with 4.9 and 4.8 so it's only a regression for GCC > 5. > The currently ICE'ins testcase passes now, so no new testcase is added. Not an expert on this, but it looks wrong to me. emit_move_insn is used a few lines above, but only for the general_operand case, so it seems it was very much intentional that way. I bet emit_move_insn isn't prepared to handle arbitrary RTL expressions, so the general_operand check makes sense for it. As process_insert_insn supposed can't fail, perhaps something earlier should have figured out it would be invalid? Jakub
Re: [PATCH][gcse] Use emit_move_insn rather than creating SET rtx and emitting that
On Thu, Mar 19, 2015 at 2:45 PM, Kyrill Tkachov wrote: > As pointed out by James Greenhalgh offline the correct thing would have been > to do an > emit_move_insn to let the backend expanders do the right thing (especially > in the concerned > testcase gcc.c-torture/execute/pr65427.c that uses 256-bit vectors that arm > doesn't support > natively). This is supposed to be caught by want_to_gcse_p() via can_assign_to_reg_without_clobbers_p(). How does your expression get past that barrier? The gcc_unreachable() is there because all the code in gcse.c assumes it is OK to emit a SET-insn without going through emit_move_insn(). Ciao! Steven
[PATCH, CHKP, Committed] Don't try to clone instrumented thunks
Hi, When thunk is cloned, its callee is temporarily set to original thunks's target which is later also cloned. Thus we should be more careful when clonning node's thunks. Bootstrapped and tested on x86_64-unknown-linux-gnu. Applied to trunk. Thanks, Ilya -- 2015-03-19 Ilya Enkovich * ipa-chkp.c (chkp_maybe_create_clone): Don't try to clone instrumented thunks. diff --git a/gcc/ipa-chkp.c b/gcc/ipa-chkp.c index 3bea06a..a9933e2 100644 --- a/gcc/ipa-chkp.c +++ b/gcc/ipa-chkp.c @@ -592,7 +592,8 @@ chkp_maybe_create_clone (tree fndecl) /* Clone all thunks. */ for (e = node->callers; e; e = e->next_caller) if (e->caller->thunk.thunk_p - && !e->caller->thunk.add_pointer_bounds_args) + && !e->caller->thunk.add_pointer_bounds_args + && !e->caller->instrumentation_clone) { struct cgraph_node *thunk = chkp_maybe_create_clone (e->caller->decl);
[PATCH][expr.c] PR 65358 Avoid clobbering partial argument during sibcall
Hi all, This patch fixes PR 65358. For details look at the excellent write-up by Honggyu in bugzilla. The problem is that we're trying to pass a struct partially on the stack and partially in regs during a tail-call optimisation but the struct we're passing is also a partial incoming arg though the split between stack and regs is different from its outgoing usage. The emit_push_insn code ends up doing a block move for the on-stack part but ends up overwriting the part that needs to be loaded into regs. My first thought was to just load the regs part first and then do the stack part but that doesn't work as multiple comments in that function indicate (the block move being expanded to movmem or other functions being one of the reasons). My proposed solution is to detect when the overlap happens, find the overlapping region and load it before the stack pushing into pseudos and after the stack pushing is done move the overlapping values from the pseudos into the hard argument regs that they're supposed to go. That way this new functionality should only ever be triggered when there's the overlap in this PR (causing wrong-code) and shouldn't affect codegen anywhere else. Bootstrapped and tested on arm-none-linux-gnueabihf, aarch64-none-linux-gnu and x86_64-linux-gnu. According to the PR this appears at least as far back 4.6 so this isn't a regression on the release branches, but it is a wrong-code bug. I'll let Honggyu upstream the testcase separately (https://gcc.gnu.org/ml/gcc-patches/2015-03/msg00984.html) I'll be testing this on the 4.8 and 4.9 branches. Thoughts on this approach? Thanks, Kyrill 2015-03-19 Kyrylo Tkachov PR middle-end/65358 * expr.c (memory_load_overlap): New function. (emit_push_insn): When pushing partial args to the stack would clobber the register part load the overlapping part into a pseudo and put it into the hard reg after pushing.commit 490c5f2074d76a2927afaea99e4dd0bacccb413c Author: Kyrylo Tkachov Date: Wed Mar 18 13:42:37 2015 + [expr.c] PR 65358 Avoid clobbering partial argument during sibcall diff --git a/gcc/expr.c b/gcc/expr.c index dc13a14..d3b9156 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -4121,6 +4121,25 @@ emit_single_push_insn (machine_mode mode, rtx x, tree type) } #endif +/* Add SIZE to X and check whether it's greater than Y. + If it is, return the constant amount by which it's greater or smaller. + If the two are not statically comparable (for example, X and Y contain + different registers) return -1. This is used in expand_push_insn to + figure out if reading SIZE bytes from location X will end up reading from + location Y. */ + +static int +memory_load_overlap (rtx x, rtx y, HOST_WIDE_INT size) +{ + rtx tmp = plus_constant (Pmode, x, size); + rtx sub = simplify_gen_binary (MINUS, Pmode, tmp, y); + + if (!CONST_INT_P (sub)) +return -1; + + return INTVAL (sub); +} + /* Generate code to push X onto the stack, assuming it has mode MODE and type TYPE. MODE is redundant except when X is a CONST_INT (since they don't @@ -4179,6 +4198,10 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, xinner = x; + int nregs = partial / UNITS_PER_WORD; + rtx *tmp_regs = NULL; + int overlapping = 0; + if (mode == BLKmode || (STRICT_ALIGNMENT && align < GET_MODE_ALIGNMENT (mode))) { @@ -4309,6 +4332,35 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, PARM_BOUNDARY. Assume the caller isn't lying. */ set_mem_align (target, align); + /* If part should go in registers and pushing to that part would + overwrite some of the values that need to go into regs, load the + overlapping values into temporary pseudos to be moved into the hard + regs at the end after the stack pushing has completed. + We cannot load them directly into the hard regs here because + they can be clobbered by the block move expansions. + See PR 65358. */ + + if (partial > 0 && reg != 0 && mode == BLKmode + && GET_CODE (reg) != PARALLEL) + { + overlapping = memory_load_overlap (XEXP (x, 0), temp, partial); + if (overlapping > 0) + { + gcc_assert (overlapping % UNITS_PER_WORD == 0); + overlapping /= UNITS_PER_WORD; + + tmp_regs = XALLOCAVEC (rtx, overlapping); + + for (int i = 0; i < overlapping; i++) + tmp_regs[i] = gen_reg_rtx (word_mode); + + for (int i = 0; i < overlapping; i++) + emit_move_insn (tmp_regs[i], +operand_subword_force (target, i, mode)); + } + else + overlapping = 0; + } emit_block_move (target, xinner, size, BLOCK_OP_CALL_PARM); } } @@ -4411,9 +4463,8 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size, } } - /* If part should go in registers, copy that part - into the appropriate registers. Do this now, at the end, - since mem-to-mem copies above may do function calls. */ + /* Move the partial arg
Re: [PATCH][gcse] Use emit_move_insn rather than creating SET rtx and emitting that
On 19/03/15 13:56, Steven Bosscher wrote: On Thu, Mar 19, 2015 at 2:45 PM, Kyrill Tkachov wrote: As pointed out by James Greenhalgh offline the correct thing would have been to do an emit_move_insn to let the backend expanders do the right thing (especially in the concerned testcase gcc.c-torture/execute/pr65427.c that uses 256-bit vectors that arm doesn't support natively). This is supposed to be caught by want_to_gcse_p() via can_assign_to_reg_without_clobbers_p(). How does your expression get past that barrier? The gcc_unreachable() is there because all the code in gcse.c assumes it is OK to emit a SET-insn without going through emit_move_insn(). Thanks for the pointers, I was too hasty with that patch. I'm not very familiar with the gcse code so I'll dig around. Kyrill Ciao! Steven
Re: [PATCH][gcse] Use emit_move_insn rather than creating SET rtx and emitting that
On 19/03/15 13:56, Steven Bosscher wrote: On Thu, Mar 19, 2015 at 2:45 PM, Kyrill Tkachov wrote: As pointed out by James Greenhalgh offline the correct thing would have been to do an emit_move_insn to let the backend expanders do the right thing (especially in the concerned testcase gcc.c-torture/execute/pr65427.c that uses 256-bit vectors that arm doesn't support natively). This is supposed to be caught by want_to_gcse_p() via can_assign_to_reg_without_clobbers_p(). How does your expression get past that barrier? The gcc_unreachable() is there because all the code in gcse.c assumes it is OK to emit a SET-insn without going through emit_move_insn(). btw, the ICE happens during the hoist pass, rather than gcse. Kyrill Ciao! Steven
[Patch, Fortran, pr55901, v1] [OOP] type is (character(len=*)) misinterpreted as array
Hi all, please find attached the parts missing to stop valgrind's complaining about the use of uninitialized memory. The issue was, that when constructing a temporary class-object to call a routine with unlimited polymorphic arguments, the _len component was never set. This is fixed by this patch now. Note, the patch is based on all these preliminary patches: https://gcc.gnu.org/ml/fortran/2015-03/msg00074.html https://gcc.gnu.org/ml/fortran/2015-03/msg00075.html https://gcc.gnu.org/ml/fortran/2015-03/msg00085.html Bootstraps and regtests ok on x86_64-linux-gnu/F20. Please review! - Andre -- Andre Vehreschild * Email: vehre ad gmx dot de pr55901_v1.clog Description: Binary data diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index 7d3f3be..a30c391 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -578,6 +578,34 @@ gfc_conv_derived_to_class (gfc_se *parmse, gfc_expr *e, } } + if (class_ts.u.derived->components->ts.type == BT_DERIVED + && class_ts.u.derived->components->ts.u.derived + ->attr.unlimited_polymorphic) +{ + /* Take care about initializing the _len component correctly. */ + ctree = gfc_class_len_get (var); + if (UNLIMITED_POLY (e)) + { + gfc_expr *len; + gfc_se se; + + len = gfc_copy_expr (e); + gfc_add_len_component (len); + gfc_init_se (&se, NULL); + gfc_conv_expr (&se, len); + if (optional) + tmp = build3_loc (input_location, COND_EXPR, TREE_TYPE (se.expr), + cond_optional, se.expr, + fold_convert (TREE_TYPE (se.expr), + integer_zero_node)); + else + tmp = se.expr; + } + else + tmp = integer_zero_node; + gfc_add_modify (&parmse->pre, ctree, fold_convert (TREE_TYPE (ctree), + tmp)); +} /* Pass the address of the class object. */ parmse->expr = gfc_build_addr_expr (NULL_TREE, var); @@ -736,44 +764,54 @@ gfc_conv_intrinsic_to_class (gfc_se *parmse, gfc_expr *e, } } - /* When the actual arg is a char array, then set the _len component of the - unlimited polymorphic entity, too. */ - if (e->ts.type == BT_CHARACTER) + gcc_assert (class_ts.type == BT_CLASS); + if (class_ts.u.derived->components->ts.type == BT_DERIVED + && class_ts.u.derived->components->ts.u.derived + ->attr.unlimited_polymorphic) { ctree = gfc_class_len_get (var); - /* Start with parmse->string_length because this seems to be set to a - correct value more often. */ - if (parmse->string_length) - gfc_add_modify (&parmse->pre, ctree, parmse->string_length); - /* When the string_length is not yet set, then try the backend_decl of - the cl. */ - else if (e->ts.u.cl->backend_decl) - gfc_add_modify (&parmse->pre, ctree, e->ts.u.cl->backend_decl); - /* If both of the above approaches fail, then try to generate an - expression from the input, which is only feasible currently, when the - expression can be evaluated to a constant one. */ - else -{ - /* Try to simplify the expression. */ - gfc_simplify_expr (e, 0); - if (e->expr_type == EXPR_CONSTANT && !e->ts.u.cl->resolved) - { - /* Amazingly all data is present to compute the length of a - constant string, but the expression is not yet there. */ - e->ts.u.cl->length = gfc_get_constant_expr (BT_INTEGER, 4, - &e->where); - mpz_set_ui (e->ts.u.cl->length->value.integer, - e->value.character.length); - gfc_conv_const_charlen (e->ts.u.cl); - e->ts.u.cl->resolved = 1; - gfc_add_modify (&parmse->pre, ctree, e->ts.u.cl->backend_decl); - } + /* When the actual arg is a char array, then set the _len component of the + unlimited polymorphic entity, too. */ + if (e->ts.type == BT_CHARACTER) + { + /* Start with parmse->string_length because this seems to be set to a + correct value more often. */ + if (parmse->string_length) + tmp = parmse->string_length; + /* When the string_length is not yet set, then try the backend_decl of + the cl. */ + else if (e->ts.u.cl->backend_decl) + tmp = e->ts.u.cl->backend_decl; + /* If both of the above approaches fail, then try to generate an + expression from the input, which is only feasible currently, when the + expression can be evaluated to a constant one. */ else { - gfc_error ("Can't compute the length of the char array at %L.", - &e->where); + /* Try to simplify the expression. */ + gfc_simplify_expr (e, 0); + if (e->expr_type == EXPR_CONSTANT && !e->ts.u.cl->resolved) + { + /* Amazingly all data is present to compute the length of a + constant string, but the expression is not yet there. */ + e->ts.u.cl->length = gfc_get_constant_expr (BT_INTEGER, 4, + &e->where); + mpz_set_ui (e->ts.u.cl->length->value.integer, + e->value.character.length); + gfc_conv_const_charlen (e->ts.u.cl); + e->ts.u.cl->resolved = 1; + tmp =
RE: [PATCH, FT32] initial support
Second ping. Also, have attached updated patchset for the current gcc. Thanks. -- James Bowman FTDI Open Source Liaison From: Joseph Myers [jos...@codesourcery.com] Sent: Tuesday, February 17, 2015 2:06 AM To: James Bowman Cc: gcc-patches@gcc.gnu.org Subject: RE: [PATCH, FT32] initial support On Mon, 16 Feb 2015, James Bowman wrote: > I have updated the target options. Space-saving is now enabled by > -Os. There is also a new option -msim to enable building for the > simulator (the simulator is pending submission to gdb-binutils). The documentation in this patch doesn't seem to have been updated for those changes. -- Joseph S. Myers jos...@codesourcery.com gcc-ft32.txt.gz Description: gcc-ft32.txt.gz
Re: [PATCH] Fix __has_{cpp_}attribute with -traditional-cpp (PR preprocessor/65238)
Hello Jakub, Jakub Jelinek writes: > __has_{cpp_,}attribute builtin macros are effectively function-like macros > taking one argument (and the ISO preprocessor expands macros in the argument > which is IMHO desirable), but the traditional preprocessor has been crashing > on them or reporting errors. > As the hook uses cpp_get_token and thus the ISO preprocessor, we need to set > up things such that the argument and ()s around it are already preprocessed > and ready to be reparsed by the ISO preprocessor (this is similar to how > e.g. #if/#elif and various other directives are handled). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2015-03-11 Jakub Jelinek > > PR preprocessor/65238 > * internal.h (_cpp_scan_out_logical_line): Add third argument. > * directives.c (prepare_directive_trad): Pass false to it. > * traditional.c (_cpp_read_logical_line_trad, > _cpp_create_trad_definition): Likewise. > (struct fun_macro): Add paramc field. > (fun_like_macro): New function. > (maybe_start_funlike): Handle NODE_BUILTIN macros. Initialize > macro->paramc field. > (save_argument): Use macro->paramc instead of > macro->node->value.macro->paramc. > (push_replacement_text): Formatting fix. > (recursive_macro): Use fun_like_macro helper. > (_cpp_scan_out_logical_line): Likewise. Add BUILTIN_MACRO_ARG > argument. Initialize fmacro.paramc field. Handle builtin > function-like macros. > > * c-c++-common/cpp/pr65238-1.c: New test. > * gcc.dg/cpp/pr65238-2.c: New test. > * gcc.dg/cpp/trad/pr65238-3.c: New test. > * gcc.dg/cpp/trad/pr65238-4.c: New test. I do not have the rights to ACK this but FWIW it looks OK to me. Sorry for the delay in reviewing this. Thanks! Cheers, -- Dodji
[PATCH] Fix PR ipa/65465
Hello. Following patch is fix for the PR. Problem is caused if we fill up cgraph_thunk_info with some values (e.g. virtual_value != 0) and further analysis set thunk_p = false. In all situations IPA ICF needs to reset all fields of the struct as it sets thunk_p = true. Tested on x86_64-linux-pc, no new regression. Fixed ICE for the arm cross compiler. Ready for trunk? Thanks, Martin >From 1b0416658cf59348664d44b14518c994075fd9bd Mon Sep 17 00:00:00 2001 From: mliska Date: Thu, 19 Mar 2015 15:36:34 +0100 Subject: [PATCH] Fix for PR ipa/65465. gcc/ChangeLog: 2015-03-19 Martin Liska PR ipa/65465 * cgraphunit.c (cgraph_node::create_wrapper): Correctly reset all fields of cgraph_thunk_info. gcc/testsuite/ChangeLog: 2015-03-19 Jakub Jelinek * g++.dg/ipa/pr65465.C: New test. --- gcc/cgraphunit.c | 3 ++- gcc/testsuite/g++.dg/ipa/pr65465.C | 16 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/ipa/pr65465.C diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index e640907..8b7d056 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -2484,8 +2484,9 @@ cgraph_node::create_wrapper (cgraph_node *target) /* Turn alias into thunk and expand it into GIMPLE representation. */ definition = true; + + memset (&thunk, 0, sizeof(cgraph_thunk_info)); thunk.thunk_p = true; - thunk.this_adjusting = false; create_edge (target, NULL, count, CGRAPH_FREQ_BASE); tree arguments = DECL_ARGUMENTS (decl); diff --git a/gcc/testsuite/g++.dg/ipa/pr65465.C b/gcc/testsuite/g++.dg/ipa/pr65465.C new file mode 100644 index 000..004b76e --- /dev/null +++ b/gcc/testsuite/g++.dg/ipa/pr65465.C @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +struct A {}; +struct B { virtual A foo () const; }; +struct C { A foo () const; }; +struct D : virtual B { A foo () const {} }; +struct F : D { virtual int bar () const; }; +int F::bar () const { return 0; } +A C::foo () const { return A (); } + +int +main () +{ + return 0; +} -- 2.1.2
Re: [PATCH] Fix PR ipa/65465
On Thu, Mar 19, 2015 at 06:08:03PM +0100, Martin Liška wrote: > >From 1b0416658cf59348664d44b14518c994075fd9bd Mon Sep 17 00:00:00 2001 > From: mliska > Date: Thu, 19 Mar 2015 15:36:34 +0100 > Subject: [PATCH] Fix for PR ipa/65465. > > gcc/ChangeLog: > > 2015-03-19 Martin Liska > > PR ipa/65465 > * cgraphunit.c (cgraph_node::create_wrapper): Correctly reset > all fields of cgraph_thunk_info. > > gcc/testsuite/ChangeLog: > > 2015-03-19 Jakub Jelinek > > * g++.dg/ipa/pr65465.C: New test. > --- > gcc/cgraphunit.c | 3 ++- > gcc/testsuite/g++.dg/ipa/pr65465.C | 16 > 2 files changed, 18 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/g++.dg/ipa/pr65465.C > > diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c > index e640907..8b7d056 100644 > --- a/gcc/cgraphunit.c > +++ b/gcc/cgraphunit.c > @@ -2484,8 +2484,9 @@ cgraph_node::create_wrapper (cgraph_node *target) > >/* Turn alias into thunk and expand it into GIMPLE representation. */ >definition = true; > + > + memset (&thunk, 0, sizeof(cgraph_thunk_info)); Please put space after sizeof. >thunk.thunk_p = true; > - thunk.this_adjusting = false; >create_edge (target, NULL, count, CGRAPH_FREQ_BASE); > >tree arguments = DECL_ARGUMENTS (decl); Ok for trunk with that change. > diff --git a/gcc/testsuite/g++.dg/ipa/pr65465.C > b/gcc/testsuite/g++.dg/ipa/pr65465.C > new file mode 100644 > index 000..004b76e > --- /dev/null > +++ b/gcc/testsuite/g++.dg/ipa/pr65465.C > @@ -0,0 +1,16 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +struct A {}; > +struct B { virtual A foo () const; }; > +struct C { A foo () const; }; > +struct D : virtual B { A foo () const {} }; > +struct F : D { virtual int bar () const; }; > +int F::bar () const { return 0; } > +A C::foo () const { return A (); } > + > +int > +main () > +{ > + return 0; > +} Why do you need main for a dg-do compile testcase? And even if you do for some reason, the return 0; in there is implicit in C++. Jakub
Re: [PATCH] Fix PR ipa/65465
On 03/19/2015 06:13 PM, Jakub Jelinek wrote: On Thu, Mar 19, 2015 at 06:08:03PM +0100, Martin Liška wrote: >From 1b0416658cf59348664d44b14518c994075fd9bd Mon Sep 17 00:00:00 2001 From: mliska Date: Thu, 19 Mar 2015 15:36:34 +0100 Subject: [PATCH] Fix for PR ipa/65465. gcc/ChangeLog: 2015-03-19 Martin Liska PR ipa/65465 * cgraphunit.c (cgraph_node::create_wrapper): Correctly reset all fields of cgraph_thunk_info. gcc/testsuite/ChangeLog: 2015-03-19 Jakub Jelinek * g++.dg/ipa/pr65465.C: New test. --- gcc/cgraphunit.c | 3 ++- gcc/testsuite/g++.dg/ipa/pr65465.C | 16 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/ipa/pr65465.C diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index e640907..8b7d056 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -2484,8 +2484,9 @@ cgraph_node::create_wrapper (cgraph_node *target) /* Turn alias into thunk and expand it into GIMPLE representation. */ definition = true; + + memset (&thunk, 0, sizeof(cgraph_thunk_info)); Please put space after sizeof. thunk.thunk_p = true; - thunk.this_adjusting = false; create_edge (target, NULL, count, CGRAPH_FREQ_BASE); tree arguments = DECL_ARGUMENTS (decl); Ok for trunk with that change. diff --git a/gcc/testsuite/g++.dg/ipa/pr65465.C b/gcc/testsuite/g++.dg/ipa/pr65465.C new file mode 100644 index 000..004b76e --- /dev/null +++ b/gcc/testsuite/g++.dg/ipa/pr65465.C @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +struct A {}; +struct B { virtual A foo () const; }; +struct C { A foo () const; }; +struct D : virtual B { A foo () const {} }; +struct F : D { virtual int bar () const; }; +int F::bar () const { return 0; } +A C::foo () const { return A (); } + +int +main () +{ + return 0; +} Why do you need main for a dg-do compile testcase? And even if you do for some reason, the return 0; in there is implicit in C++. Jakub Thanks for both notes, there's fixed version I'm going to install. Martin >From 065789b7b2e8bf94f1caf2a610c5acc1831c20bc Mon Sep 17 00:00:00 2001 From: mliska Date: Thu, 19 Mar 2015 15:36:34 +0100 Subject: [PATCH] Fix for PR ipa/65465. gcc/ChangeLog: 2015-03-19 Martin Liska PR ipa/65465 * cgraphunit.c (cgraph_node::create_wrapper): Correctly reset all fields of cgraph_thunk_info. gcc/testsuite/ChangeLog: 2015-03-19 Jakub Jelinek * g++.dg/ipa/pr65465.C: New test. --- gcc/cgraphunit.c | 3 ++- gcc/testsuite/g++.dg/ipa/pr65465.C | 10 ++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/ipa/pr65465.C diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index e640907..8ac92e1 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -2484,8 +2484,9 @@ cgraph_node::create_wrapper (cgraph_node *target) /* Turn alias into thunk and expand it into GIMPLE representation. */ definition = true; + + memset (&thunk, 0, sizeof (cgraph_thunk_info)); thunk.thunk_p = true; - thunk.this_adjusting = false; create_edge (target, NULL, count, CGRAPH_FREQ_BASE); tree arguments = DECL_ARGUMENTS (decl); diff --git a/gcc/testsuite/g++.dg/ipa/pr65465.C b/gcc/testsuite/g++.dg/ipa/pr65465.C new file mode 100644 index 000..436d88f --- /dev/null +++ b/gcc/testsuite/g++.dg/ipa/pr65465.C @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +struct A {}; +struct B { virtual A foo () const; }; +struct C { A foo () const; }; +struct D : virtual B { A foo () const {} }; +struct F : D { virtual int bar () const; }; +int F::bar () const { return 0; } +A C::foo () const { return A (); } -- 2.1.2
[PATCH] Fix PR ipa/65380
Hello. This is fix for the issue as introduced by Honza. I've just finished testing on x86_64-linux-pc and the patch is pre-approved by Honza. Thanks, Martin >From f3c845d74b7a1edbbba9ecefa081feb58700d515 Mon Sep 17 00:00:00 2001 From: mliska Date: Thu, 19 Mar 2015 10:52:44 +0100 Subject: [PATCH] Fix PR ipa/65380. gcc/ChangeLog: 2015-03-19 Jan Hubicka PR ipa/65380 * ipa-icf.c (sem_function::merge): Do not merge DECL_EXTERNAL symbols. (sem_variable::merge): Likewise. --- gcc/ipa-icf.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c index f68d23c..360cf17 100644 --- a/gcc/ipa-icf.c +++ b/gcc/ipa-icf.c @@ -814,6 +814,13 @@ sem_function::merge (sem_item *alias_item) bool original_address_matters = original->address_matters_p (); bool alias_address_matters = alias->address_matters_p (); + if (DECL_EXTERNAL (alias->decl)) +{ + if (dump_file) + fprintf (dump_file, "Not unifying; alias is external.\n\n"); + return false; +} + if (DECL_NO_INLINE_WARNING_P (original->decl) != DECL_NO_INLINE_WARNING_P (alias->decl)) { @@ -1776,6 +1783,13 @@ sem_variable::merge (sem_item *alias_item) return false; } + if (DECL_EXTERNAL (alias_item->decl)) +{ + if (dump_file) + fprintf (dump_file, "Not unifying; alias is external.\n\n"); + return false; +} + sem_variable *alias_var = static_cast (alias_item); varpool_node *original = get_node (); -- 2.1.2
Re: [debug-early] emitting early debug for external variables
On 03/19/2015 02:08 AM, Richard Biener wrote: On Wed, Mar 18, 2015 at 10:28 PM, Jason Merrill wrote: If you move the call to rest_of_decl_compilation we could go through and prune the debug info for unused decls at dwarf2out_finish time, the way we do with unused types. True. Note that the varpool nodes eventually get created once the first use is seen. So I wonder how much garbage we create by unconditionally creating the node in rest_of_decl_compilation (yeah, header files, of course - probably a similar issue for unused function declarations?). I'd prefer to do early_global_decl from rest_of_decl_compilation (and shun the symtab/varpool walk - but that would require the FEs would hand off each global that possibly needs debug info through rest_of_decl_compilation). To prune globals during early(!) dwarf2out_finish you should be able to use the symbol table (not sure if we prune all unused symbols, but surely the list of references should be empty). Richard. Thank you both. I have moved the debug early generation for _symbols_ to rest_of_decl_compilation. I'm not so so brave as to move FUNCTION_DECL's and such. Besides, things are working fine. Let me get through the rest of my gdb regressions. :). I am now running gdb tests for every patch, in an effort to fix the plethora of regressions I have caused over the past few months. The current patch exposes no regressions for guality.exp, or for the gdb testsuite. For that matter, it fixes 4-5 gdb regressions. I will prune the unused DIEs in a subsequent patch; actually much later, when I'm done regression hunting. Let me know if you have any problems with this. I am committing to the branch. Aldy commit b1457fad19257267facddb6ce88c6041a24a5154 Author: Aldy Hernandez Date: Thu Mar 19 10:21:02 2015 -0700 Unconditionally send all global symbols (whether used or not) to early_global_decl. diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 1650c6c..e60acd5 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -2430,16 +2430,6 @@ symbol_table::finalize_compilation_unit (void) if (flag_dump_passes) dump_passes (); - /* Generate early debug for global symbols. Any local symbols will - be handled by either handling reachable functions further down - (and by consequence, locally scoped symbols), or by generating - DIEs for types. */ - symtab_node *snode; - FOR_EACH_SYMBOL (snode) -if (TREE_CODE (snode->decl) != FUNCTION_DECL - && !decl_function_context (snode->decl)) - (*debug_hooks->early_global_decl) (snode->decl); - /* Gimplify and lower all functions, compute reachability and remove unreachable nodes. */ analyze_functions (); diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c index 92f4903..76fd70b 100644 --- a/gcc/dwarf2out.c +++ b/gcc/dwarf2out.c @@ -21868,17 +21868,6 @@ dwarf2out_decl (tree decl) break; case VAR_DECL: - /* Ignore this VAR_DECL if it refers to a file-scope extern data object -declaration and if the declaration was never even referenced from -within this entire compilation unit. We suppress these DIEs in -order to save space in the .debug section (by eliminating entries -which are probably useless). Note that we must not suppress -block-local extern declarations (whether used or not) because that -would screw-up the debugger's name lookup mechanism and cause it to -miss things which really ought to be in scope at a given point. */ - if (DECL_EXTERNAL (decl) && !TREE_USED (decl)) - return NULL; - /* For local statics lookup proper context die. */ if (local_function_static (decl)) context_die = lookup_decl_die (DECL_CONTEXT (decl)); @@ -25110,6 +25099,8 @@ dwarf2out_finish (const char *filename) if (flag_eliminate_unused_debug_types) prune_unused_types (); + /* FIXME: Prune DIEs for unused decls. */ + /* Generate separate COMDAT sections for type DIEs. */ if (use_debug_types) { diff --git a/gcc/passes.c b/gcc/passes.c index 3a16768..ce06035 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -293,11 +293,16 @@ rest_of_decl_compilation (tree decl, && TREE_STATIC (decl)) varpool_node::get_create (decl); - /* ?? Theoretically, we should be able to to call - debug_hooks->early_global_decl() here just as we do for - rest_of_type_compilation below. This would require changing how - we're currently calling early_global_decl() in all the - front-ends. Something to look into later. */ + /* Generate early debug for global symbols. Any local symbols will + be handled by either handling reachable functions from + finalize_compilation_unit (and by consequence, locally scoped + symbols), or by rest_of_type_compilation below. */ + if (!flag_wpa + && TREE_CODE (decl) != FUNCTION_DECL + && !decl_function_context (decl) + && !current_function_decl + && !decl_t
Re: C++ PATCH for c++/65398 (valid constexpr rejected)
On Wed, Mar 18, 2015 at 11:08:15AM +0100, Richard Biener wrote: > On Fri, Mar 13, 2015 at 3:40 PM, Marek Polacek wrote: > > We started to reject this (IMHO valid) testcase with r214941 that did away > > with > > try_move_mult_to_index -- meaning that we are no longer able to fold > > *(&s[0] + 1) > > into s[1], while we are able to fold *(s + 1) into s[1]. > > > > I suppose cxx_fold_indirect_ref ought to be able to handle both cases, so I > > added > > some code to that effect, it should handle now at least the simple cases... > > Or should that be handled in the middle end? > > It's "correct" for constexpr folding but not correct to hand s[1] down to > the middle-end IL (both cases). Well, in the particular case with > in-array-bound constant and a non-pointer base it's good enough at > least. I believe cxx_fold_indirect_ref result is not passed through to the middle-end, unless it can be folded into a constant. Though, a question is if we do (or, if we don't and should) reject say constexpr char s[] = "abc"; constexpr int j = 4; constexpr char c = *(&s[j] - 2); because there was out of bound access in there. Jakub
Re: C++ PATCH for c++/65398 (valid constexpr rejected)
On Thu, Mar 19, 2015 at 07:05:36PM +0100, Jakub Jelinek wrote: > On Wed, Mar 18, 2015 at 11:08:15AM +0100, Richard Biener wrote: > > On Fri, Mar 13, 2015 at 3:40 PM, Marek Polacek wrote: > > > We started to reject this (IMHO valid) testcase with r214941 that did > > > away with > > > try_move_mult_to_index -- meaning that we are no longer able to fold > > > *(&s[0] + 1) > > > into s[1], while we are able to fold *(s + 1) into s[1]. > > > > > > I suppose cxx_fold_indirect_ref ought to be able to handle both cases, so > > > I added > > > some code to that effect, it should handle now at least the simple > > > cases... > > > Or should that be handled in the middle end? > > > > It's "correct" for constexpr folding but not correct to hand s[1] down to > > the middle-end IL (both cases). Well, in the particular case with > > in-array-bound constant and a non-pointer base it's good enough at > > least. > > I believe cxx_fold_indirect_ref result is not passed through to the > middle-end, unless it can be folded into a constant. > > Though, a question is if we do (or, if we don't and should) reject say > constexpr char s[] = "abc"; > constexpr int j = 4; > constexpr char c = *(&s[j] - 2); > because there was out of bound access in there. That is rejected even with my patch with: error: overflow in constant expression [-fpermissive] and without the patch: error: ‘*((& s[4]) + 18446744073709551614u)’ is not a constant expression (a valid constexpr can't have UB). Marek
Re: C++ PATCH for c++/65398 (valid constexpr rejected)
On Thu, Mar 19, 2015 at 07:13:47PM +0100, Marek Polacek wrote: > On Thu, Mar 19, 2015 at 07:05:36PM +0100, Jakub Jelinek wrote: > > I believe cxx_fold_indirect_ref result is not passed through to the > > middle-end, unless it can be folded into a constant. > > > > Though, a question is if we do (or, if we don't and should) reject say > > constexpr char s[] = "abc"; > > constexpr int j = 4; > > constexpr char c = *(&s[j] - 2); > > because there was out of bound access in there. > > That is rejected even with my patch with: > error: overflow in constant expression [-fpermissive] > and without the patch: > error: ‘*((& s[4]) + 18446744073709551614u)’ is not a constant expression > (a valid constexpr can't have UB). But s/j = 4/j = 3/ should be valid, don't we reject even that? I mean, isn't the rejection because we fold the - 2 early into sizetype (unsigned) + -2UL? Jakub
Re: C++ PATCH for c++/65398 (valid constexpr rejected)
On Thu, Mar 19, 2015 at 07:17:23PM +0100, Jakub Jelinek wrote: > On Thu, Mar 19, 2015 at 07:13:47PM +0100, Marek Polacek wrote: > > On Thu, Mar 19, 2015 at 07:05:36PM +0100, Jakub Jelinek wrote: > > > I believe cxx_fold_indirect_ref result is not passed through to the > > > middle-end, unless it can be folded into a constant. > > > > > > Though, a question is if we do (or, if we don't and should) reject say > > > constexpr char s[] = "abc"; > > > constexpr int j = 4; > > > constexpr char c = *(&s[j] - 2); > > > because there was out of bound access in there. > > > > That is rejected even with my patch with: > > error: overflow in constant expression [-fpermissive] > > and without the patch: > > error: ‘*((& s[4]) + 18446744073709551614u)’ is not a constant expression > > (a valid constexpr can't have UB). > > But s/j = 4/j = 3/ should be valid, don't we reject even that? > I mean, isn't the rejection because we fold the - 2 early into sizetype > (unsigned) + -2UL? Unfortunately we reject even that (regardless the patch), and yeah, it's because of how POINTER_PLUS_EXPR uses sizetype as a type of the second operand. E.g. constexpr char s[] = "abc"; constexpr int j = 1; constexpr char c = *(&s[j] + 3); is correctly rejected with the patch: error: array subscript out of bound Marek
C++ PATCH for c++/65046 (ABI tags and functions/variables)
This patch makes some significant changes to attribute abi_tag. First, it allows explicit naming of tags on inline namespaces, which previously always had a tag with the same name as the namespace itself; this is still the default if no tag is specified. It also introduces automatic tagging of functions and variables with tagged types where the tags are not already reflected in the mangled name. I feel somewhat uneasy about this change, but I think it's the right answer. -Wabi-tag will also warn about this so that people are aware of it and can tag explicitly if they want to. A smaller change is introducing a macro to check for inline namespaces, which were previously completely indistinguishable without checking for the strong using in the enclosing namespace. I'm also reverting some libstdc++ changes to add ABI tags to things in anonymous namespaces, which have no external ABI so we shouldn't care about their mangled names. Tested x86_64-pc-linux-gnu, applying to trunk. commit 525578ca03ac5d451ca4bd604357e1c90ac6e671 Author: Jason Merrill Date: Tue Mar 10 15:34:49 2015 -0400 PR c++/65046 Automatically propagate ABI tags to variables and functions from their (return) type. * class.c (check_tag): Handle variables and functions. (mark_or_check_attr_tags): Split out from find_abi_tags_r. (mark_or_check_tags): Likewise. (mark_abi_tags): Use it. Rename from mark_type_abi_tags. (check_abi_tags): Add single argument overload for decls. Handle inheriting tags for decls. * mangle.c (write_mangled_name): Call it. (mangle_return_type_p): Split out from write_encoding. (unmangled_name_p): Split out from write_mangled_name. (write_mangled_name): Ignore abi_tag on namespace. * cp-tree.h (NAMESPACE_IS_INLINE): Replace NAMESPACE_ABI_TAG. * parser.c (cp_parser_namespace_definition): Set it. * name-lookup.c (handle_namespace_attrs): Use arguments. Warn about abi_tag attribute on non-inline namespace. * tree.c (check_abi_tag_args): Split out from handle_abi_tag_attribute. (handle_abi_tag_attribute): Allow tags on variables. diff --git a/gcc/cp/class.c b/gcc/cp/class.c index 8612163..0518320 100644 --- a/gcc/cp/class.c +++ b/gcc/cp/class.c @@ -1382,44 +1382,53 @@ struct abi_tag_data a tag NAMESPACE_DECL) or a STRING_CST (a tag attribute). */ static void -check_tag (tree tag, tree *tp, abi_tag_data *p) +check_tag (tree tag, tree id, tree *tp, abi_tag_data *p) { - tree id; - - if (TREE_CODE (tag) == STRING_CST) -id = get_identifier (TREE_STRING_POINTER (tag)); - else -{ - id = tag; - tag = NULL_TREE; -} - if (!IDENTIFIER_MARKED (id)) { - if (!tag) - tag = build_string (IDENTIFIER_LENGTH (id) + 1, - IDENTIFIER_POINTER (id)); if (p->tags != error_mark_node) { - /* We're collecting tags from template arguments. */ + /* We're collecting tags from template arguments or from + the type of a variable or function return type. */ p->tags = tree_cons (NULL_TREE, tag, p->tags); - ABI_TAG_IMPLICIT (p->tags) = true; /* Don't inherit this tag multiple times. */ IDENTIFIER_MARKED (id) = true; + + if (TYPE_P (p->t)) + { + /* Tags inherited from type template arguments are only used + to avoid warnings. */ + ABI_TAG_IMPLICIT (p->tags) = true; + return; + } + /* For functions and variables we want to warn, too. */ } /* Otherwise we're diagnosing missing tags. */ + if (TREE_CODE (p->t) == FUNCTION_DECL) + { + if (warning (OPT_Wabi_tag, "%qD inherits the %E ABI tag " + "that %qT (used in its return type) has", + p->t, tag, *tp)) + inform (location_of (*tp), "%qT declared here", *tp); + } + else if (TREE_CODE (p->t) == VAR_DECL) + { + if (warning (OPT_Wabi_tag, "%qD inherits the %E ABI tag " + "that %qT (used in its type) has", p->t, tag, *tp)) + inform (location_of (*tp), "%qT declared here", *tp); + } else if (TYPE_P (p->subob)) { - if (warning (OPT_Wabi_tag, "%qT does not have the %E abi tag " + if (warning (OPT_Wabi_tag, "%qT does not have the %E ABI tag " "that base %qT has", p->t, tag, p->subob)) inform (location_of (p->subob), "%qT declared here", p->subob); } else { - if (warning (OPT_Wabi_tag, "%qT does not have the %E abi tag " + if (warning (OPT_Wabi_tag, "%qT does not have the %E ABI tag " "that %qT (used in the type of %qD) has", p->t, tag, *tp, p->subob)) { @@ -1431,8 +1440,53 @@ check_tag (tree tag, tree *tp, abi_tag_data *p) } } +/* Find all the ABI tags in the attribute list ATTR and either call + check_tag (if TP is non-null) or set IDENTIFIER_MARKED to val. */ + +static void +mark_or_check_attr_tags (tree attr, tree *tp, abi_tag_data *p, bool val) +{ + if (!attr) +return; + for (; (attr = lookup_attribute ("abi_tag", attr)); + attr
[PATCH] Speed-up IPA ICF by enhanced hash values
Hi. Following patch improves performance by adding hash values of references that are not candidates for IPA ICF. Tested on x86_64-linux-pc w/o any new regression observed. Ready for trunk? Thanks, Martin >From 6c04cc4283d08a8aa9829574dd91579a918cb508 Mon Sep 17 00:00:00 2001 From: marxin Date: Sun, 15 Mar 2015 19:21:39 -0500 Subject: [PATCH] IPA ICF: include hash values of references. gcc/ChangeLog: 2015-03-15 Martin Liska * ipa-icf.c (sem_item::update_hash_by_addr_refs): New function. (sem_item::update_hash_by_local_refs): Likewise. (sem_variable::get_hash): Empty line is fixed. (sem_item_optimizer::execute): Include adding of hash references. (sem_item_optimizer::update_hash_by_addr_refs): New function. (sem_item_optimizer::build_hash_based_classes): Use local hash. * ipa-icf.h (sem_item::update_hash_by_addr_refs): New function. (sem_item::update_hash_by_local_refs): Likewise. --- gcc/ipa-icf.c | 84 --- gcc/ipa-icf.h | 18 - 2 files changed, 97 insertions(+), 5 deletions(-) diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c index f68d23c..b8e3aa4 100644 --- a/gcc/ipa-icf.c +++ b/gcc/ipa-icf.c @@ -557,6 +557,69 @@ sem_function::equals_wpa (sem_item *item, return true; } +/* Update hash by address sensitive references. */ + +void +sem_item::update_hash_by_addr_refs (hash_map &m_symtab_node_map) +{ + if (is_a (node) && DECL_VIRTUAL_P (node->decl)) +return; + + ipa_ref* ref; + inchash::hash hstate (hash); + for (unsigned i = 0; i < node->num_references (); i++) +{ + ref = node->iterate_reference (i, ref); + if (ref->address_matters_p ()) + hstate.add_ptr (ref->referred->ultimate_alias_target ()); +} + + if (is_a (node)) +{ + for (cgraph_edge *e = dyn_cast (node)->callers; e; + e = e->next_caller) + { + sem_item **result = m_symtab_node_map.get (e->callee); + if (!result) + hstate.add_ptr (e->callee->ultimate_alias_target ()); + } +} + + hash = hstate.end (); +} + +/* Update hash by computed local hash values taken from different + semantic items. */ + +void +sem_item::update_hash_by_local_refs (hash_map &m_symtab_node_map) +{ + inchash::hash state (hash); + for (unsigned j = 0; j < node->num_references (); j++) +{ + ipa_ref *ref; + ref = node->iterate_reference (j, ref); + sem_item **result = m_symtab_node_map.get (ref->referring); + if (result) + state.merge_hash ((*result)->hash); +} + + if (type == FUNC) +{ + for (cgraph_edge *e = dyn_cast (node)->callees; e; + e = e->next_callee) + { + sem_item **result = m_symtab_node_map.get (e->caller); + if (result) + state.merge_hash ((*result)->hash); + } +} + + global_hash = state.end (); +} + /* Returns true if the item equals to ITEM given as argument. */ bool @@ -1742,8 +1805,8 @@ hashval_t sem_variable::get_hash (void) { if (hash) - return hash; + /* All WPA streamed in symbols should have their hashes computed at compile time. At this point, the constructor may not be in memory at all. DECL_INITIAL (decl) would be error_mark_node in that case. */ @@ -2202,6 +2265,8 @@ sem_item_optimizer::execute (void) filter_removed_items (); unregister_hooks (); + build_graph (); + update_hash_by_addr_refs (); build_hash_based_classes (); if (dump_file) @@ -2211,8 +2276,6 @@ sem_item_optimizer::execute (void) for (unsigned int i = 0; i < m_items.length(); i++) m_items[i]->init_wpa (); - build_graph (); - subdivide_classes_by_equality (true); if (dump_file) @@ -2301,6 +2364,19 @@ sem_item_optimizer::add_item_to_class (congruence_class *cls, sem_item *item) item->cls = cls; } +void +sem_item_optimizer::update_hash_by_addr_refs () +{ + for (unsigned i = 0; i < m_items.length (); i++) +m_items[i]->update_hash_by_addr_refs (m_symtab_node_map); + + for (unsigned i = 0; i < m_items.length (); i++) +m_items[i]->update_hash_by_local_refs (m_symtab_node_map); + + for (unsigned i = 0; i < m_items.length (); i++) +m_items[i]->hash = m_items[i]->global_hash; +} + /* Congruence classes are built by hash value. */ void @@ -2310,7 +2386,7 @@ sem_item_optimizer::build_hash_based_classes (void) { sem_item *item = m_items[i]; - congruence_class_group *group = get_group_by_hash (item->get_hash (), + congruence_class_group *group = get_group_by_hash (item->hash, item->type); if (!group->classes.length ()) diff --git a/gcc/ipa-icf.h b/gcc/ipa-icf.h index 8245b54..ad502a7 100644 --- a/gcc/ipa-icf.h +++ b/gcc/ipa-icf.h @@ -189,6 +189,15 @@ public: /* Dump symbol to FILE. */ virtual void dump_to_file (FILE *file) = 0; + /* Update hash by address sensitive references. */ + void update_hash_by_addr_refs (hash_map &m_symtab_node_map); + + /* Update hash by computed local hash values taken from different + semantic items. */ + void update_has
Re: [debug-early] emitting early debug for external variables
On 03/19/2015 02:03 PM, Aldy Hernandez wrote: I have moved the debug early generation for _symbols_ to rest_of_decl_compilation I think you mean "variables". Functions are also symbols. :) Other than that, makes sense to me. Jason
Re: Fix PR 65177: diamonds are not valid execution threads for jump threading
Richard Biener wrote: > please instead fixup after copy_bbs in duplicate_seme_region. > Thanks for the review. Attached patch that does not modify copy_bbs. Fixes make check in hmmer and make check RUNTESTFLAGS=tree-ssa.exp Full bootstrap and regtest in progress on x86_64-linux. Ok for trunk? >From 8f1516235bce3e1c4f359149dcc546d813ed7817 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Tue, 17 Mar 2015 20:28:19 +0100 Subject: [PATCH] diamonds are not valid execution threads for jump threading PR tree-optimization/65177 * tree-ssa-threadupdate.c (verify_seme): Renamed verify_jump_thread. (bb_in_bbs): New. (duplicate_seme_region): Renamed duplicate_thread_path. Redirect all edges not adjacent on the path to the original code. --- gcc/tree-ssa-threadupdate.c | 93 +++ 1 files changed, 67 insertions(+), 26 deletions(-) diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c index 7a159bb..610e807 100644 --- a/gcc/tree-ssa-threadupdate.c +++ b/gcc/tree-ssa-threadupdate.c @@ -2328,36 +2328,32 @@ bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED) return false; } -/* Verify that the REGION is a Single Entry Multiple Exits region: make sure no - edge other than ENTRY is entering the REGION. */ +/* Verify that the REGION is a valid jump thread. A jump thread is a special + case of SEME Single Entry Multiple Exits region in which all nodes in the + REGION have exactly one incoming edge. The only exception is the first block + that may not have been connected to the rest of the cfg yet. */ DEBUG_FUNCTION void -verify_seme (edge entry, basic_block *region, unsigned n_region) +verify_jump_thread (basic_block *region, unsigned n_region) { - bitmap bbs = BITMAP_ALLOC (NULL); - for (unsigned i = 0; i < n_region; i++) -bitmap_set_bit (bbs, region[i]->index); +gcc_assert (EDGE_COUNT (region[i]->preds) <= 1); +} - for (unsigned i = 0; i < n_region; i++) -{ - edge e; - edge_iterator ei; - basic_block bb = region[i]; +/* Return true when BB is one of the first N items in BBS. */ - /* All predecessors other than ENTRY->src should be in the region. */ - for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); ei_next (&ei)) - if (e != entry) - gcc_assert (bitmap_bit_p (bbs, e->src->index)); -} +static inline bool +bb_in_bbs (basic_block bb, basic_block *bbs, int n) +{ + for (int i = 0; i < n; i++) +if (bb == bbs[i]) + return true; - BITMAP_FREE (bbs); + return false; } -/* Duplicates a Single Entry Multiple Exit REGION (set of N_REGION basic - blocks). The ENTRY edge is redirected to the duplicate of the region. If - REGION is not a Single Entry region, ignore any incoming edges other than - ENTRY: this makes the copied region a Single Entry region. +/* Duplicates a jump-thread path of N_REGION basic blocks. + The ENTRY edge is redirected to the duplicate of the region. Remove the last conditional statement in the last basic block in the REGION, and create a single fallthru edge pointing to the same destination as the @@ -2369,7 +2365,7 @@ verify_seme (edge entry, basic_block *region, unsigned n_region) Returns false if it is unable to copy the region, true otherwise. */ static bool -duplicate_seme_region (edge entry, edge exit, +duplicate_thread_path (edge entry, edge exit, basic_block *region, unsigned n_region, basic_block *region_copy) { @@ -2428,7 +2424,53 @@ duplicate_seme_region (edge entry, edge exit, } copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop, - split_edge_bb_loc (entry), 0); + split_edge_bb_loc (entry), false); + + /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The + following code ensures that all the edges exiting the jump-thread path are + redirected back to the original code: these edges are exceptions + invalidating the property that is propagated by executing all the blocks of + the jump-thread path in order. */ + + for (i = 0; i < n_region; i++) +{ + edge e; + edge_iterator ei; + basic_block bb = region_copy[i]; + + if (single_succ_p (bb)) + { + /* Make sure the successor is the next node in the path. */ + gcc_assert (i + 1 == n_region + || region_copy[i + 1] == single_succ_edge (bb)->dest); + continue; + } + + /* Special case the last block on the path: make sure that it does not + jump back on the copied path. */ + if (i + 1 == n_region) + { + FOR_EACH_EDGE (e, ei, bb->succs) + if (bb_in_bbs (e->dest, region_copy, n_region - 1)) + { + basic_block orig = get_bb_original (e->dest); + if (orig) + redirect_edge_and_branch_force (e, orig); + } + continue; + } + + /* Redirect all other edges jumping to non-adjacent blocks back to the + original code. */ + FOR_EACH_EDGE (e, ei, bb->succs) + if (region_copy[i + 1] != e->dest)
Patch to fix PR63491
The following patch fixes https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63491 The patch was bootstrapped on x86-64, power64, and aarch64. Committed as rev. 221552. 2015-03-19 Vladimir Makarov PR rtl-optimization/63491 * lra-constraints.c (check_and_process_move): Use src instead of sreg. Remove some dead code. 2015-03-19 Vladimir Makarov PR rtl-optimization/63491 * gcc.target/powerpc/pr63491.c: New. Index: lra-constraints.c === --- lra-constraints.c (revision 221494) +++ lra-constraints.c (working copy) @@ -1074,10 +1074,9 @@ static bool check_and_process_move (bool *change_p, bool *sec_mem_p ATTRIBUTE_UNUSED) { int sregno, dregno; - rtx dest, src, dreg, sreg, old_sreg, new_reg, scratch_reg; + rtx dest, src, dreg, sreg, new_reg, scratch_reg; rtx_insn *before; enum reg_class dclass, sclass, secondary_class; - machine_mode sreg_mode; secondary_reload_info sri; lra_assert (curr_insn_set != NULL_RTX); @@ -1101,8 +1100,6 @@ check_and_process_move (bool *change_p, were a right class for the pseudo, secondary_... hooks usually are not define for ALL_REGS. */ return false; - sreg_mode = GET_MODE (sreg); - old_sreg = sreg; if (REG_P (sreg)) sclass = get_reg_class (REGNO (sreg)); if (sclass == ALL_REGS) @@ -1161,9 +1158,9 @@ check_and_process_move (bool *change_p, sri.icode = CODE_FOR_nothing; sri.extra_cost = 0; secondary_class - = (enum reg_class) targetm.secondary_reload (true, sreg, + = (enum reg_class) targetm.secondary_reload (true, src, (reg_class_t) dclass, - sreg_mode, &sri); + GET_MODE (src), &sri); /* Check the target hook consistency. */ lra_assert ((secondary_class == NO_REGS && sri.icode == CODE_FOR_nothing) @@ -1179,14 +1176,12 @@ check_and_process_move (bool *change_p, *change_p = true; new_reg = NULL_RTX; if (secondary_class != NO_REGS) -new_reg = lra_create_new_reg_with_unique_value (sreg_mode, NULL_RTX, +new_reg = lra_create_new_reg_with_unique_value (GET_MODE (src), NULL_RTX, secondary_class, "secondary"); start_sequence (); - if (old_sreg != sreg) -sreg = copy_rtx (sreg); if (sri.icode == CODE_FOR_nothing) -lra_emit_move (new_reg, sreg); +lra_emit_move (new_reg, src); else { enum reg_class scratch_class; @@ -1197,18 +1192,13 @@ check_and_process_move (bool *change_p, (insn_data[sri.icode].operand[2].mode, NULL_RTX, scratch_class, "scratch")); emit_insn (GEN_FCN (sri.icode) (new_reg != NULL_RTX ? new_reg : dest, - sreg, scratch_reg)); + src, scratch_reg)); } before = get_insns (); end_sequence (); lra_process_new_insns (curr_insn, before, NULL, "Inserting the move"); if (new_reg != NULL_RTX) -{ - if (GET_CODE (src) == SUBREG) - SUBREG_REG (src) = new_reg; - else - SET_SRC (curr_insn_set) = new_reg; -} +SET_SRC (curr_insn_set) = new_reg; else { if (lra_dump_file != NULL) Index: testsuite/gcc.target/powerpc/pr63491.c === --- testsuite/gcc.target/powerpc/pr63491.c (revision 0) +++ testsuite/gcc.target/powerpc/pr63491.c (working copy) @@ -0,0 +1,17 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-options "-O1 -m64 -mcpu=power8 -mlra" } */ + +typedef __int128_t __attribute__((__vector_size__(16))) vector_128_t; +typedef unsigned long long scalar_64_t; + +vector_128_t +foo (void) +{ + union { +scalar_64_t i64[2]; +vector_128_t v128; + } u; + u.i64[0] = 1; + u.i64[1] = 2; + return u.v128; +}
Re: [debug-early] emitting early debug for external variables
On 03/19/2015 12:43 PM, Jason Merrill wrote: On 03/19/2015 02:03 PM, Aldy Hernandez wrote: I have moved the debug early generation for _symbols_ to rest_of_decl_compilation I think you mean "variables". Functions are also symbols. :) Oops, yes I did :). I will adjust the comments in my patch. Thanks.
Re: [PATCH] Speed-up IPA ICF by enhanced hash values
> diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c > index f68d23c..b8e3aa4 100644 > --- a/gcc/ipa-icf.c > +++ b/gcc/ipa-icf.c > @@ -557,6 +557,69 @@ sem_function::equals_wpa (sem_item *item, >return true; > } > > +/* Update hash by address sensitive references. */ > + > +void > +sem_item::update_hash_by_addr_refs (hash_map + sem_item *> &m_symtab_node_map) > +{ > + if (is_a (node) && DECL_VIRTUAL_P (node->decl)) Do not return early here, if reference goes to external symbol, we still want to record it as sensitive. ref->address_matters_p () should behave well here. > +return; > + > + ipa_ref* ref; > + inchash::hash hstate (hash); > + for (unsigned i = 0; i < node->num_references (); i++) > +{ > + ref = node->iterate_reference (i, ref); > + if (ref->address_matters_p ()) ref->address_matters_p () || !m_symtab_node_map.get (ref->referred) if refernce goes to external symbol, it behaves like sensitive. You probably want to update topleve comment explaining what is sensitive and local reference and how the hashing is handled. > + hstate.add_ptr (ref->referred->ultimate_alias_target ()); > +} > + > + if (is_a (node)) > +{ > + for (cgraph_edge *e = dyn_cast (node)->callers; e; > +e = e->next_caller) > + { > + sem_item **result = m_symtab_node_map.get (e->callee); > + if (!result) > + hstate.add_ptr (e->callee->ultimate_alias_target ()); > + } > +} > + > + hash = hstate.end (); > +} > + > +/* Update hash by computed local hash values taken from different > + semantic items. */ Please add TODO that stronger SCC based hashing would be desirable here. > @@ -2301,6 +2364,19 @@ sem_item_optimizer::add_item_to_class > (congruence_class *cls, sem_item *item) >item->cls = cls; > } > > +void > +sem_item_optimizer::update_hash_by_addr_refs () Add block comment ande xplain why the addr and local updates can not be performed at once or someone gets an idea to merge the loops. > +{ > + for (unsigned i = 0; i < m_items.length (); i++) > +m_items[i]->update_hash_by_addr_refs (m_symtab_node_map); > + > + for (unsigned i = 0; i < m_items.length (); i++) > +m_items[i]->update_hash_by_local_refs (m_symtab_node_map); > + > + for (unsigned i = 0; i < m_items.length (); i++) > +m_items[i]->hash = m_items[i]->global_hash; > +} > + OK with these changes. Honza
Make messages_members.cc Catalog_info and Catalogs ABI agnostic
On 18/03/2015 19:16, Jonathan Wakely wrote: Preparing this patch reminded me that we currently have two copies of the Catalog_info and Catalogs code in the unnamed namespace in config/locale/gnu/messages_members.cc, one using the old string and one using the new. We should really alter the code to not use std::string so that the catalogs can be shared by both versions of the messages facets. Hello Do you mean like the attached patch ? Or do I need to isolate get_catalogs function in a dedicated source file that won't be built twice ? Tested under Linux x86_64 but uniqueness of catalogs have not been checked. François diff --git a/libstdc++-v3/config/locale/gnu/messages_members.cc b/libstdc++-v3/config/locale/gnu/messages_members.cc index c90499e..c34d846 100644 --- a/libstdc++-v3/config/locale/gnu/messages_members.cc +++ b/libstdc++-v3/config/locale/gnu/messages_members.cc @@ -46,18 +46,21 @@ namespace typedef messages_base::catalog catalog; - struct _GLIBCXX_DEFAULT_ABI_TAG Catalog_info + struct Catalog_info { -Catalog_info(catalog __id, const string& __domain, locale __loc) - : _M_id(__id), _M_domain(__domain), _M_locale(__loc) +Catalog_info(catalog __id, const char* __domain, locale __loc) + : _M_id(__id), _M_domain(strdup(__domain)), _M_locale(__loc) { } +~Catalog_info() +{ free(_M_domain); } + catalog _M_id; -string _M_domain; +char* _M_domain; locale _M_locale; }; - class _GLIBCXX_DEFAULT_ABI_TAG Catalogs + class Catalogs { public: Catalogs() : _M_catalog_counter(0) { } @@ -70,7 +73,7 @@ namespace } catalog -_M_add(const string& __domain, locale __l) +_M_add(const char* __domain, locale __l) { __gnu_cxx::__scoped_lock lock(_M_mutex); @@ -82,6 +85,10 @@ namespace std::auto_ptr info(new Catalog_info(_M_catalog_counter++, __domain, __l)); + + if (!info->_M_domain) + return -1; + _M_infos.push_back(info.get()); return info.release()->_M_id; } @@ -133,7 +140,6 @@ namespace std::vector _M_infos; }; - _GLIBCXX_DEFAULT_ABI_TAG Catalogs& get_catalogs() { @@ -181,7 +187,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bind_textdomain_codeset(__s.c_str(), __nl_langinfo_l(CODESET, __codecvt._M_c_locale_codecvt)); - return get_catalogs()._M_add(__s, __l); + return get_catalogs()._M_add(__s.c_str(), __l); } template<> @@ -203,7 +209,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return __dfault; return get_glibc_msg(_M_c_locale_messages, _M_name_messages, - __cat_info->_M_domain.c_str(), + __cat_info->_M_domain, __dfault.c_str()); } @@ -219,7 +225,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bind_textdomain_codeset(__s.c_str(), __nl_langinfo_l(CODESET, __codecvt._M_c_locale_codecvt)); - return get_catalogs()._M_add(__s, __l); + return get_catalogs()._M_add(__s.c_str(), __l); } template<> @@ -261,7 +267,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Make sure string passed to dgettext is \0 terminated. *__dfault_next = '\0'; __translation = get_glibc_msg(_M_c_locale_messages, _M_name_messages, - __cat_info->_M_domain.c_str(), __dfault); + __cat_info->_M_domain, __dfault); // If we end up getting default value back we can simply return original // default value.
Re: [PATCH] Speed-up def_builtin_const (ix86_valid_target_attribute)
On 03/13/2015 05:42 AM, Jan Hubicka wrote: 2015-03-09 Martin Liska * config/i386/i386.c (def_builtin): Collect union of all possible masks. (ix86_add_new_builtins): Do not iterate over all builtins in cases that isa value has no intersection with possible masks and(or) last passed value is equal to the provided. --- gcc/config/i386/i386.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ab8f03a..5f180b6 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -30592,6 +30592,8 @@ struct builtin_isa { static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; +/* Union of all masks that are part of builtin_isa structures. */ +static HOST_WIDE_INT defined_isa_values = 0; /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK of which isa_flags to use in the ix86_builtins_isa array. Stores the @@ -30619,6 +30621,7 @@ def_builtin (HOST_WIDE_INT mask, const char *name, if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT) { ix86_builtins_isa[(int) code].isa = mask; + defined_isa_values |= mask; I think you can move this down to set_and_not_build_p set. Please add also comment explaining the caching mehanism. Hi. Explanation of the patch is introduced. mask &= ~OPTION_MASK_ISA_64BIT; if (mask == 0 @@ -30670,6 +30673,14 @@ def_builtin_const (HOST_WIDE_INT mask, const char *name, static void ix86_add_new_builtins (HOST_WIDE_INT isa) { + /* Last cached isa value. */ + static HOST_WIDE_INT last_tested_isa_value = 0; + + if ((isa & defined_isa_values) == 0 || isa == last_tested_isa_value) Heer you need to compare (isa & defined_isa_values) == (isa & last_tested_isa_value) right, because we have isa flags that enable no builtins. I do not understand why, the guard simply ignores last value, which is already processed and 'isa' with any intersection with defined_isa_values. Maybe I miss something? Thanks, Martin Honza >From f40f0fa07d8a9643050c30cd4e29c8c4c8de6cce Mon Sep 17 00:00:00 2001 From: marxin Date: Sun, 8 Mar 2015 19:39:55 -0500 Subject: [PATCH] def_builtin_const: speed-up. gcc/ChangeLog: 2015-03-09 Martin Liska * config/i386/i386.c (def_builtin): Collect union of all possible masks. (ix86_add_new_builtins): Do not iterate over all builtins in cases that isa value has no intersection with possible masks and(or) last passed value is equal to the provided. --- gcc/config/i386/i386.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ab8f03a..134b349 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -30592,6 +30592,8 @@ struct builtin_isa { static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; +/* Union of all masks that are part of builtin_isa structures. */ +static HOST_WIDE_INT defined_isa_values = 0; /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK of which isa_flags to use in the ix86_builtins_isa array. Stores the @@ -30619,6 +30621,7 @@ def_builtin (HOST_WIDE_INT mask, const char *name, if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT) { ix86_builtins_isa[(int) code].isa = mask; + defined_isa_values |= mask; mask &= ~OPTION_MASK_ISA_64BIT; if (mask == 0 @@ -30670,6 +30673,17 @@ def_builtin_const (HOST_WIDE_INT mask, const char *name, static void ix86_add_new_builtins (HOST_WIDE_INT isa) { + /* Last cached isa value. */ + static HOST_WIDE_INT last_tested_isa_value = 0; + + /* We iterate through all defined builtins just if the last tested + values is different from ISA and just in case there is any intersection + between ISA value and union of all possible configurations. */ + if ((isa & defined_isa_values) == 0 || isa == last_tested_isa_value) +return; + + last_tested_isa_value = isa; + int i; tree saved_current_target_pragma = current_target_pragma; current_target_pragma = NULL_TREE; -- 2.1.2
[PATCH] Fix fdump-passes
[ was: Re: [PATCH, stage1] Make parloops gate more strict ] On 19-03-15 10:00, Richard Biener wrote: Yeah - it makes the -fdump-passes "hack" more pervasive throughout >>the compiler. >> >>I suppose it should instead build & push a "dummy" sturct function. >> > >Like this? Looks good to me. Added ChangeLog entry, bootstrapped and reg-tested on x86_64. OK for stage1 (or stage4) trunk? Thanks, - Tom Fix fdump-passes 2015-03-19 Tom de Vries * function.c (push_dummy_function): New function. (init_dummy_function_start): Use push_dummy_function. (pop_dummy_function): New function. Factored out of ... (expand_dummy_function_end): ... here. * function.h (push_dummy_function, pop_dummy_function): Declare. * passes.c (pass_manager::dump_passes): Use push_dummy_function and pop_dummy_function. * tree-chkp.c (chkp_gate): Handle cgraph_node::get (cfun->decl) == NULL. --- gcc/function.c | 37 - gcc/function.h | 2 ++ gcc/passes.c| 17 ++--- gcc/tree-chkp.c | 6 -- 4 files changed, 40 insertions(+), 22 deletions(-) diff --git a/gcc/function.c b/gcc/function.c index 2c3d142..9ddbad8 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -4862,6 +4862,29 @@ prepare_function_start (void) frame_pointer_needed = 0; } +void +push_dummy_function (bool with_decl) +{ + tree fn_decl, fn_type, fn_result_decl; + + gcc_assert (!in_dummy_function); + in_dummy_function = true; + + if (with_decl) +{ + fn_type = build_function_type_list (void_type_node, NULL_TREE); + fn_decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, NULL_TREE, + fn_type); + fn_result_decl = build_decl (UNKNOWN_LOCATION, RESULT_DECL, + NULL_TREE, void_type_node); + DECL_RESULT (fn_decl) = fn_result_decl; +} + else +fn_decl = NULL_TREE; + + push_struct_function (fn_decl); +} + /* Initialize the rtl expansion mechanism so that we can do simple things like generate sequences. This is used to provide a context during global initialization of some passes. You must call expand_dummy_function_end @@ -4870,9 +4893,7 @@ prepare_function_start (void) void init_dummy_function_start (void) { - gcc_assert (!in_dummy_function); - in_dummy_function = true; - push_struct_function (NULL_TREE); + push_dummy_function (false); prepare_function_start (); } @@ -5144,6 +5165,13 @@ expand_function_start (tree subr) stack_check_probe_note = emit_note (NOTE_INSN_DELETED); } +void +pop_dummy_function (void) +{ + pop_cfun (); + in_dummy_function = false; +} + /* Undo the effects of init_dummy_function_start. */ void expand_dummy_function_end (void) @@ -5159,8 +5187,7 @@ expand_dummy_function_end (void) free_after_parsing (cfun); free_after_compilation (cfun); - pop_cfun (); - in_dummy_function = false; + pop_dummy_function (); } /* Helper for diddle_return_value. */ diff --git a/gcc/function.h b/gcc/function.h index b89c5ae..349f80c 100644 --- a/gcc/function.h +++ b/gcc/function.h @@ -899,6 +899,8 @@ extern int get_next_funcdef_no (void); extern int get_last_funcdef_no (void); extern void allocate_struct_function (tree, bool); extern void push_struct_function (tree fndecl); +extern void push_dummy_function (bool); +extern void pop_dummy_function (void); extern void init_dummy_function_start (void); extern void init_function_start (tree); extern void stack_protect_epilogue (void); diff --git a/gcc/passes.c b/gcc/passes.c index 23a90d9..bca8dbb 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -944,32 +944,19 @@ dump_passes (void) void pass_manager::dump_passes () const { - struct cgraph_node *n, *node = NULL; + push_dummy_function (true); create_pass_tab (); - FOR_EACH_FUNCTION (n) -if (DECL_STRUCT_FUNCTION (n->decl)) - { - node = n; - break; - } - - if (!node) -return; - - push_cfun (DECL_STRUCT_FUNCTION (node->decl)); - dump_pass_list (all_lowering_passes, 1); dump_pass_list (all_small_ipa_passes, 1); dump_pass_list (all_regular_ipa_passes, 1); dump_pass_list (all_late_ipa_passes, 1); dump_pass_list (all_passes, 1); - pop_cfun (); + pop_dummy_function (); } - /* Returns the pass with NAME. */ static opt_pass * diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c index d2df4ba..16afadf 100644 --- a/gcc/tree-chkp.c +++ b/gcc/tree-chkp.c @@ -4337,8 +4337,10 @@ chkp_execute (void) static bool chkp_gate (void) { - return cgraph_node::get (cfun->decl)->instrumentation_clone -|| lookup_attribute ("chkp ctor", DECL_ATTRIBUTES (cfun->decl)); + cgraph_node *node = cgraph_node::get (cfun->decl); + return ((node != NULL + && node->instrumentation_clone) + || lookup_attribute ("chkp ctor", DECL_ATTRIBUTES (cfun->decl))); } namespace { -- 1.9.1
Re: [GOOGLE] Fixes for TLS wrapper and init functions in LIPO mode
New patch below. Passes regression tests plus internal application build. 2015-03-19 Teresa Johnson gcc/ Google ref b/19618364. * cp/decl2.c (get_local_tls_init_fn): Assert on aux module. (get_tls_init_fn): Promote non-public init functions if necessary in LIPO mode, record new global at module scope. (get_tls_wrapper_fn): Record new global at module scope. (handle_tls_init): Skip in aux module, setup alias in exported primary module. testsuite/ Google ref b/19618364. * testsuite/g++.dg/tree-prof/lipo/tls.h: New test. * testsuite/g++.dg/tree-prof/lipo/tls2.h: Ditto. * testsuite/g++.dg/tree-prof/lipo/tls2_0.C: Ditto. * testsuite/g++.dg/tree-prof/lipo/tls2_1.C: Ditto. * testsuite/g++.dg/tree-prof/lipo/tls_0.C: Ditto. * testsuite/g++.dg/tree-prof/lipo/tls_1.C: Ditto. Index: cp/decl2.c === --- cp/decl2.c (revision 221425) +++ cp/decl2.c (working copy) @@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see #include "langhooks.h" #include "c-family/c-ada-spec.h" #include "asan.h" +#include "gcov-io.h" extern cpp_reader *parse_in; @@ -3024,6 +3025,9 @@ var_needs_tls_wrapper (tree var) static tree get_local_tls_init_fn (void) { + /* In LIPO mode we should not generate local init functions for + the aux modules (see handle_tls_init). */ + gcc_assert (!L_IPO_IS_AUXILIARY_MODULE); tree sname = get_identifier ("__tls_init"); tree fn = IDENTIFIER_GLOBAL_VALUE (sname); if (!fn) @@ -3057,14 +3061,39 @@ get_tls_init_fn (tree var) if (!flag_extern_tls_init && DECL_EXTERNAL (var)) return NULL_TREE; + /* In LIPO mode we should not generate local init functions for + aux modules. The wrapper will reference the variable's init function + that is defined in its own primary module. Otherwise there is + a name conflict between the primary and aux __tls_init functions, + and difficulty ensuring each TLS variable is initialized exactly once. + Therefore, if this is an aux module or an exported primary module, we + need to ensure that the init function is available externally by + promoting it if it is not already public. This is similar to the + handling in promote_static_var_func, but we do it as the init function + is created to avoid the need to detect and properly promote this + artificial decl later on. */ + bool promote = L_IPO_IS_AUXILIARY_MODULE || + (L_IPO_COMP_MODE && PRIMARY_MODULE_EXPORTED); + #ifdef ASM_OUTPUT_DEF /* If the variable is internal, or if we can't generate aliases, - call the local init function directly. */ - if (!TREE_PUBLIC (var)) + call the local init function directly. Don't do this if we + are in LIPO mode an may need to export the init function. Note + that get_local_tls_init_fn will assert if it is called on an aux + module. */ + if (!TREE_PUBLIC (var) && !promote) #endif return get_local_tls_init_fn (); tree sname = mangle_tls_init_fn (var); + if (promote) +{ + const char *name = IDENTIFIER_POINTER (sname); + char *assembler_name = (char*) alloca (strlen (name) + 30); + sprintf (assembler_name, "%s.cmo.%u", name, current_module_id); + sname = get_identifier (assembler_name); +} + tree fn = IDENTIFIER_GLOBAL_VALUE (sname); if (!fn) { @@ -3072,7 +3101,18 @@ get_tls_init_fn (tree var) build_function_type (void_type_node, void_list_node)); SET_DECL_LANGUAGE (fn, lang_c); - TREE_PUBLIC (fn) = TREE_PUBLIC (var); + if (promote) +{ + TREE_PUBLIC (fn) = 1; + DECL_VISIBILITY (fn) = VISIBILITY_HIDDEN; + DECL_VISIBILITY_SPECIFIED (fn) = 1; +} + else +{ + TREE_PUBLIC (fn) = TREE_PUBLIC (var); + DECL_VISIBILITY (fn) = DECL_VISIBILITY (var); + DECL_VISIBILITY_SPECIFIED (fn) = DECL_VISIBILITY_SPECIFIED (var); +} DECL_ARTIFICIAL (fn) = true; DECL_COMDAT (fn) = DECL_COMDAT (var); DECL_EXTERNAL (fn) = DECL_EXTERNAL (var); @@ -3091,8 +3131,6 @@ get_tls_init_fn (tree var) else DECL_WEAK (fn) = DECL_WEAK (var); } - DECL_VISIBILITY (fn) = DECL_VISIBILITY (var); - DECL_VISIBILITY_SPECIFIED (fn) = DECL_VISIBILITY_SPECIFIED (var); DECL_DLLIMPORT_P (fn) = DECL_DLLIMPORT_P (var); DECL_IGNORED_P (fn) = 1; mark_used (fn); @@ -3100,6 +3138,11 @@ get_tls_init_fn (tree var) DECL_BEFRIENDING_CLASSES (fn) = var; SET_IDENTIFIER_GLOBAL_VALUE (sname, fn); + /* In LIPO mode make sure we record the new global value so that it + is cleared before parsing the next aux module. */ + if (L_IPO_COMP_MODE && !is_parsing_done_p ()) +add_decl_to_current_module_scope (fn, +
Re: [GOOGLE] Fixes for TLS wrapper and init functions in LIPO mode
does generate_tls_wrapper also need to be suppressed for aux module? David On Thu, Mar 19, 2015 at 4:11 PM, Teresa Johnson wrote: > New patch below. Passes regression tests plus internal application build. > > 2015-03-19 Teresa Johnson > > gcc/ > Google ref b/19618364. > * cp/decl2.c (get_local_tls_init_fn): Assert on aux module. > (get_tls_init_fn): Promote non-public init functions if necessary > in LIPO mode, record new global at module scope. > (get_tls_wrapper_fn): Record new global at module scope. > (handle_tls_init): Skip in aux module, setup alias in exported > primary module. > > testsuite/ > Google ref b/19618364. > * testsuite/g++.dg/tree-prof/lipo/tls.h: New test. > * testsuite/g++.dg/tree-prof/lipo/tls2.h: Ditto. > * testsuite/g++.dg/tree-prof/lipo/tls2_0.C: Ditto. > * testsuite/g++.dg/tree-prof/lipo/tls2_1.C: Ditto. > * testsuite/g++.dg/tree-prof/lipo/tls_0.C: Ditto. > * testsuite/g++.dg/tree-prof/lipo/tls_1.C: Ditto. > > Index: cp/decl2.c > === > --- cp/decl2.c (revision 221425) > +++ cp/decl2.c (working copy) > @@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see > #include "langhooks.h" > #include "c-family/c-ada-spec.h" > #include "asan.h" > +#include "gcov-io.h" > > extern cpp_reader *parse_in; > > @@ -3024,6 +3025,9 @@ var_needs_tls_wrapper (tree var) > static tree > get_local_tls_init_fn (void) > { > + /* In LIPO mode we should not generate local init functions for > + the aux modules (see handle_tls_init). */ > + gcc_assert (!L_IPO_IS_AUXILIARY_MODULE); >tree sname = get_identifier ("__tls_init"); >tree fn = IDENTIFIER_GLOBAL_VALUE (sname); >if (!fn) > @@ -3057,14 +3061,39 @@ get_tls_init_fn (tree var) >if (!flag_extern_tls_init && DECL_EXTERNAL (var)) > return NULL_TREE; > > + /* In LIPO mode we should not generate local init functions for > + aux modules. The wrapper will reference the variable's init function > + that is defined in its own primary module. Otherwise there is > + a name conflict between the primary and aux __tls_init functions, > + and difficulty ensuring each TLS variable is initialized exactly once. > + Therefore, if this is an aux module or an exported primary module, we > + need to ensure that the init function is available externally by > + promoting it if it is not already public. This is similar to the > + handling in promote_static_var_func, but we do it as the init function > + is created to avoid the need to detect and properly promote this > + artificial decl later on. */ > + bool promote = L_IPO_IS_AUXILIARY_MODULE || > + (L_IPO_COMP_MODE && PRIMARY_MODULE_EXPORTED); > + > #ifdef ASM_OUTPUT_DEF >/* If the variable is internal, or if we can't generate aliases, > - call the local init function directly. */ > - if (!TREE_PUBLIC (var)) > + call the local init function directly. Don't do this if we > + are in LIPO mode an may need to export the init function. Note > + that get_local_tls_init_fn will assert if it is called on an aux > + module. */ > + if (!TREE_PUBLIC (var) && !promote) > #endif > return get_local_tls_init_fn (); > >tree sname = mangle_tls_init_fn (var); > + if (promote) > +{ > + const char *name = IDENTIFIER_POINTER (sname); > + char *assembler_name = (char*) alloca (strlen (name) + 30); > + sprintf (assembler_name, "%s.cmo.%u", name, current_module_id); > + sname = get_identifier (assembler_name); > +} > + >tree fn = IDENTIFIER_GLOBAL_VALUE (sname); >if (!fn) > { > @@ -3072,7 +3101,18 @@ get_tls_init_fn (tree var) > build_function_type (void_type_node, > void_list_node)); >SET_DECL_LANGUAGE (fn, lang_c); > - TREE_PUBLIC (fn) = TREE_PUBLIC (var); > + if (promote) > +{ > + TREE_PUBLIC (fn) = 1; > + DECL_VISIBILITY (fn) = VISIBILITY_HIDDEN; > + DECL_VISIBILITY_SPECIFIED (fn) = 1; > +} > + else > +{ > + TREE_PUBLIC (fn) = TREE_PUBLIC (var); > + DECL_VISIBILITY (fn) = DECL_VISIBILITY (var); > + DECL_VISIBILITY_SPECIFIED (fn) = DECL_VISIBILITY_SPECIFIED (var); > +} >DECL_ARTIFICIAL (fn) = true; >DECL_COMDAT (fn) = DECL_COMDAT (var); >DECL_EXTERNAL (fn) = DECL_EXTERNAL (var); > @@ -3091,8 +3131,6 @@ get_tls_init_fn (tree var) > else > DECL_WEAK (fn) = DECL_WEAK (var); > } > - DECL_VISIBILITY (fn) = DECL_VISIBILITY (var); > - DECL_VISIBILITY_SPECIFIED (fn) = DECL_VISIBILITY_SPECIFIED (var); >DECL_DLLIMPORT_P (fn) = DECL_DLLIMPORT_P (var); >DECL_IGNORED_P (fn) = 1; >mark_used (fn); > @@ -3100,6 +3138,11 @@
Re: [PATCH] Speed-up def_builtin_const (ix86_valid_target_attribute)
> Hi. > > Explanation of the patch is introduced. thanks > > >> > >>mask &= ~OPTION_MASK_ISA_64BIT; > >>if (mask == 0 > >>@@ -30670,6 +30673,14 @@ def_builtin_const (HOST_WIDE_INT mask, const char > >>*name, > >> static void > >> ix86_add_new_builtins (HOST_WIDE_INT isa) > >> { > >>+ /* Last cached isa value. */ > >>+ static HOST_WIDE_INT last_tested_isa_value = 0; > >>+ > >>+ if ((isa & defined_isa_values) == 0 || isa == last_tested_isa_value) > > > >Heer you need to compare (isa & defined_isa_values) == (isa & > >last_tested_isa_value) right, because we have isa flags that enable no > >builtins. > > I do not understand why, the guard simply ignores last value, which is > already processed and > 'isa' with any intersection with defined_isa_values. Maybe I miss something? I think you can also skip processing in cases ISA changed but the change has empty intersection with defined_isa_values. Honza
Re: [GOOGLE] Fixes for TLS wrapper and init functions in LIPO mode
On Thu, Mar 19, 2015 at 4:45 PM, Xinliang David Li wrote: > does generate_tls_wrapper also need to be suppressed for aux module? No, we generate the wrapper in the aux module and use it to access the TLS variable and invoke it's init function (which is defined in the variable's own module). Presumably we could generate that only in the original module and promote it if necessary, but generating it in the aux module does allow it to be inlined. This is essentially how the TLS accesses are handled for extern TLS variables defined elsewhere (wrapper generated in referring module). Teresa > > David > > On Thu, Mar 19, 2015 at 4:11 PM, Teresa Johnson wrote: >> New patch below. Passes regression tests plus internal application build. >> >> 2015-03-19 Teresa Johnson >> >> gcc/ >> Google ref b/19618364. >> * cp/decl2.c (get_local_tls_init_fn): Assert on aux module. >> (get_tls_init_fn): Promote non-public init functions if necessary >> in LIPO mode, record new global at module scope. >> (get_tls_wrapper_fn): Record new global at module scope. >> (handle_tls_init): Skip in aux module, setup alias in exported >> primary module. >> >> testsuite/ >> Google ref b/19618364. >> * testsuite/g++.dg/tree-prof/lipo/tls.h: New test. >> * testsuite/g++.dg/tree-prof/lipo/tls2.h: Ditto. >> * testsuite/g++.dg/tree-prof/lipo/tls2_0.C: Ditto. >> * testsuite/g++.dg/tree-prof/lipo/tls2_1.C: Ditto. >> * testsuite/g++.dg/tree-prof/lipo/tls_0.C: Ditto. >> * testsuite/g++.dg/tree-prof/lipo/tls_1.C: Ditto. >> >> Index: cp/decl2.c >> === >> --- cp/decl2.c (revision 221425) >> +++ cp/decl2.c (working copy) >> @@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see >> #include "langhooks.h" >> #include "c-family/c-ada-spec.h" >> #include "asan.h" >> +#include "gcov-io.h" >> >> extern cpp_reader *parse_in; >> >> @@ -3024,6 +3025,9 @@ var_needs_tls_wrapper (tree var) >> static tree >> get_local_tls_init_fn (void) >> { >> + /* In LIPO mode we should not generate local init functions for >> + the aux modules (see handle_tls_init). */ >> + gcc_assert (!L_IPO_IS_AUXILIARY_MODULE); >>tree sname = get_identifier ("__tls_init"); >>tree fn = IDENTIFIER_GLOBAL_VALUE (sname); >>if (!fn) >> @@ -3057,14 +3061,39 @@ get_tls_init_fn (tree var) >>if (!flag_extern_tls_init && DECL_EXTERNAL (var)) >> return NULL_TREE; >> >> + /* In LIPO mode we should not generate local init functions for >> + aux modules. The wrapper will reference the variable's init function >> + that is defined in its own primary module. Otherwise there is >> + a name conflict between the primary and aux __tls_init functions, >> + and difficulty ensuring each TLS variable is initialized exactly once. >> + Therefore, if this is an aux module or an exported primary module, we >> + need to ensure that the init function is available externally by >> + promoting it if it is not already public. This is similar to the >> + handling in promote_static_var_func, but we do it as the init function >> + is created to avoid the need to detect and properly promote this >> + artificial decl later on. */ >> + bool promote = L_IPO_IS_AUXILIARY_MODULE || >> + (L_IPO_COMP_MODE && PRIMARY_MODULE_EXPORTED); >> + >> #ifdef ASM_OUTPUT_DEF >>/* If the variable is internal, or if we can't generate aliases, >> - call the local init function directly. */ >> - if (!TREE_PUBLIC (var)) >> + call the local init function directly. Don't do this if we >> + are in LIPO mode an may need to export the init function. Note >> + that get_local_tls_init_fn will assert if it is called on an aux >> + module. */ >> + if (!TREE_PUBLIC (var) && !promote) >> #endif >> return get_local_tls_init_fn (); >> >>tree sname = mangle_tls_init_fn (var); >> + if (promote) >> +{ >> + const char *name = IDENTIFIER_POINTER (sname); >> + char *assembler_name = (char*) alloca (strlen (name) + 30); >> + sprintf (assembler_name, "%s.cmo.%u", name, current_module_id); >> + sname = get_identifier (assembler_name); >> +} >> + >>tree fn = IDENTIFIER_GLOBAL_VALUE (sname); >>if (!fn) >> { >> @@ -3072,7 +3101,18 @@ get_tls_init_fn (tree var) >> build_function_type (void_type_node, >> void_list_node)); >>SET_DECL_LANGUAGE (fn, lang_c); >> - TREE_PUBLIC (fn) = TREE_PUBLIC (var); >> + if (promote) >> +{ >> + TREE_PUBLIC (fn) = 1; >> + DECL_VISIBILITY (fn) = VISIBILITY_HIDDEN; >> + DECL_VISIBILITY_SPECIFIED (fn) = 1; >> +} >> + else >> +{ >> + TREE_PUBLIC (fn) = TREE_PUBLIC (var); >> + DECL_VISIBILITY (fn) = DECL_VISIBILITY
Re: [GOOGLE] Fixes for TLS wrapper and init functions in LIPO mode
ok -- then there is an over assertion in get_local_tls_init_fn. The method can be called for aux module -- the decl created will also be promoted. Also can we rely on late promotion (special case of artificial function __tls_init? This can avoid duplicated logic here. David On Thu, Mar 19, 2015 at 5:51 PM, Teresa Johnson wrote: > On Thu, Mar 19, 2015 at 4:45 PM, Xinliang David Li wrote: >> does generate_tls_wrapper also need to be suppressed for aux module? > > No, we generate the wrapper in the aux module and use it to access the > TLS variable and invoke it's init function (which is defined in the > variable's own module). Presumably we could generate that only in the > original module and promote it if necessary, but generating it in the > aux module does allow it to be inlined. This is essentially how the > TLS accesses are handled for extern TLS variables defined elsewhere > (wrapper generated in referring module). > > Teresa > >> >> David >> >> On Thu, Mar 19, 2015 at 4:11 PM, Teresa Johnson wrote: >>> New patch below. Passes regression tests plus internal application build. >>> >>> 2015-03-19 Teresa Johnson >>> >>> gcc/ >>> Google ref b/19618364. >>> * cp/decl2.c (get_local_tls_init_fn): Assert on aux module. >>> (get_tls_init_fn): Promote non-public init functions if necessary >>> in LIPO mode, record new global at module scope. >>> (get_tls_wrapper_fn): Record new global at module scope. >>> (handle_tls_init): Skip in aux module, setup alias in exported >>> primary module. >>> >>> testsuite/ >>> Google ref b/19618364. >>> * testsuite/g++.dg/tree-prof/lipo/tls.h: New test. >>> * testsuite/g++.dg/tree-prof/lipo/tls2.h: Ditto. >>> * testsuite/g++.dg/tree-prof/lipo/tls2_0.C: Ditto. >>> * testsuite/g++.dg/tree-prof/lipo/tls2_1.C: Ditto. >>> * testsuite/g++.dg/tree-prof/lipo/tls_0.C: Ditto. >>> * testsuite/g++.dg/tree-prof/lipo/tls_1.C: Ditto. >>> >>> Index: cp/decl2.c >>> === >>> --- cp/decl2.c (revision 221425) >>> +++ cp/decl2.c (working copy) >>> @@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see >>> #include "langhooks.h" >>> #include "c-family/c-ada-spec.h" >>> #include "asan.h" >>> +#include "gcov-io.h" >>> >>> extern cpp_reader *parse_in; >>> >>> @@ -3024,6 +3025,9 @@ var_needs_tls_wrapper (tree var) >>> static tree >>> get_local_tls_init_fn (void) >>> { >>> + /* In LIPO mode we should not generate local init functions for >>> + the aux modules (see handle_tls_init). */ >>> + gcc_assert (!L_IPO_IS_AUXILIARY_MODULE); >>>tree sname = get_identifier ("__tls_init"); >>>tree fn = IDENTIFIER_GLOBAL_VALUE (sname); >>>if (!fn) >>> @@ -3057,14 +3061,39 @@ get_tls_init_fn (tree var) >>>if (!flag_extern_tls_init && DECL_EXTERNAL (var)) >>> return NULL_TREE; >>> >>> + /* In LIPO mode we should not generate local init functions for >>> + aux modules. The wrapper will reference the variable's init function >>> + that is defined in its own primary module. Otherwise there is >>> + a name conflict between the primary and aux __tls_init functions, >>> + and difficulty ensuring each TLS variable is initialized exactly once. >>> + Therefore, if this is an aux module or an exported primary module, we >>> + need to ensure that the init function is available externally by >>> + promoting it if it is not already public. This is similar to the >>> + handling in promote_static_var_func, but we do it as the init function >>> + is created to avoid the need to detect and properly promote this >>> + artificial decl later on. */ >>> + bool promote = L_IPO_IS_AUXILIARY_MODULE || >>> + (L_IPO_COMP_MODE && PRIMARY_MODULE_EXPORTED); >>> + >>> #ifdef ASM_OUTPUT_DEF >>>/* If the variable is internal, or if we can't generate aliases, >>> - call the local init function directly. */ >>> - if (!TREE_PUBLIC (var)) >>> + call the local init function directly. Don't do this if we >>> + are in LIPO mode an may need to export the init function. Note >>> + that get_local_tls_init_fn will assert if it is called on an aux >>> + module. */ >>> + if (!TREE_PUBLIC (var) && !promote) >>> #endif >>> return get_local_tls_init_fn (); >>> >>>tree sname = mangle_tls_init_fn (var); >>> + if (promote) >>> +{ >>> + const char *name = IDENTIFIER_POINTER (sname); >>> + char *assembler_name = (char*) alloca (strlen (name) + 30); >>> + sprintf (assembler_name, "%s.cmo.%u", name, current_module_id); >>> + sname = get_identifier (assembler_name); >>> +} >>> + >>>tree fn = IDENTIFIER_GLOBAL_VALUE (sname); >>>if (!fn) >>> { >>> @@ -3072,7 +3101,18 @@ get_tls_init_fn (tree var) >>> build_function_type (void_type_node, >>>
Re: [GOOGLE] Fixes for TLS wrapper and init functions in LIPO mode
On Thu, Mar 19, 2015 at 8:00 PM, Xinliang David Li wrote: > ok -- then there is an over assertion in get_local_tls_init_fn. The > method can be called for aux module -- the decl created will also be > promoted. No, it won't ever be called for an aux module. Both callsites are guarded (handle_tls_init has an early return at the top, and the callsite is guarded in get_tls_iniit_fn). > > Also can we rely on late promotion (special case of artificial > function __tls_init? This can avoid duplicated logic here. We don't need to promote __tls_init, but rather the init functions for each TLS variable (which are each aliased to __tls_init). I thought about both approaches, but promoting it as it was created seemed more straightforward. I can give that approach a try though if you prefer it (special casing DECL_ARTIFICIAL functions that start with _ZTH). Incidentally they are not marked static, so that would also need to be done to get them promoted. Teresa > > David > > On Thu, Mar 19, 2015 at 5:51 PM, Teresa Johnson wrote: >> On Thu, Mar 19, 2015 at 4:45 PM, Xinliang David Li >> wrote: >>> does generate_tls_wrapper also need to be suppressed for aux module? >> >> No, we generate the wrapper in the aux module and use it to access the >> TLS variable and invoke it's init function (which is defined in the >> variable's own module). Presumably we could generate that only in the >> original module and promote it if necessary, but generating it in the >> aux module does allow it to be inlined. This is essentially how the >> TLS accesses are handled for extern TLS variables defined elsewhere >> (wrapper generated in referring module). >> >> Teresa >> >>> >>> David >>> >>> On Thu, Mar 19, 2015 at 4:11 PM, Teresa Johnson >>> wrote: New patch below. Passes regression tests plus internal application build. 2015-03-19 Teresa Johnson gcc/ Google ref b/19618364. * cp/decl2.c (get_local_tls_init_fn): Assert on aux module. (get_tls_init_fn): Promote non-public init functions if necessary in LIPO mode, record new global at module scope. (get_tls_wrapper_fn): Record new global at module scope. (handle_tls_init): Skip in aux module, setup alias in exported primary module. testsuite/ Google ref b/19618364. * testsuite/g++.dg/tree-prof/lipo/tls.h: New test. * testsuite/g++.dg/tree-prof/lipo/tls2.h: Ditto. * testsuite/g++.dg/tree-prof/lipo/tls2_0.C: Ditto. * testsuite/g++.dg/tree-prof/lipo/tls2_1.C: Ditto. * testsuite/g++.dg/tree-prof/lipo/tls_0.C: Ditto. * testsuite/g++.dg/tree-prof/lipo/tls_1.C: Ditto. Index: cp/decl2.c === --- cp/decl2.c (revision 221425) +++ cp/decl2.c (working copy) @@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see #include "langhooks.h" #include "c-family/c-ada-spec.h" #include "asan.h" +#include "gcov-io.h" extern cpp_reader *parse_in; @@ -3024,6 +3025,9 @@ var_needs_tls_wrapper (tree var) static tree get_local_tls_init_fn (void) { + /* In LIPO mode we should not generate local init functions for + the aux modules (see handle_tls_init). */ + gcc_assert (!L_IPO_IS_AUXILIARY_MODULE); tree sname = get_identifier ("__tls_init"); tree fn = IDENTIFIER_GLOBAL_VALUE (sname); if (!fn) @@ -3057,14 +3061,39 @@ get_tls_init_fn (tree var) if (!flag_extern_tls_init && DECL_EXTERNAL (var)) return NULL_TREE; + /* In LIPO mode we should not generate local init functions for + aux modules. The wrapper will reference the variable's init function + that is defined in its own primary module. Otherwise there is + a name conflict between the primary and aux __tls_init functions, + and difficulty ensuring each TLS variable is initialized exactly once. + Therefore, if this is an aux module or an exported primary module, we + need to ensure that the init function is available externally by + promoting it if it is not already public. This is similar to the + handling in promote_static_var_func, but we do it as the init function + is created to avoid the need to detect and properly promote this + artificial decl later on. */ + bool promote = L_IPO_IS_AUXILIARY_MODULE || + (L_IPO_COMP_MODE && PRIMARY_MODULE_EXPORTED); + #ifdef ASM_OUTPUT_DEF /* If the variable is internal, or if we can't generate aliases, - call the local init function directly. */ - if (!TREE_PUBLIC (var)) + call the local init function directly. Don't do this if we + are in LIPO mode an may need to export t
Re: [GOOGLE] Fixes for TLS wrapper and init functions in LIPO mode
On Thu, Mar 19, 2015 at 9:57 PM, Teresa Johnson wrote: > On Thu, Mar 19, 2015 at 8:00 PM, Xinliang David Li wrote: >> ok -- then there is an over assertion in get_local_tls_init_fn. The >> method can be called for aux module -- the decl created will also be >> promoted. > > No, it won't ever be called for an aux module. Both callsites are > guarded (handle_tls_init has an early return at the top, and the > callsite is guarded in get_tls_iniit_fn). > >> >> Also can we rely on late promotion (special case of artificial >> function __tls_init? This can avoid duplicated logic here. > > We don't need to promote __tls_init, but rather the init functions for > each TLS variable (which are each aliased to __tls_init). > I thought > about both approaches, but promoting it as it was created seemed more > straightforward. I can give that approach a try though if you prefer > it (special casing DECL_ARTIFICIAL functions that start with _ZTH). > Incidentally they are not marked static, so that would also need to be > done to get them promoted. For public tls symbols in aux module, tls wrappers will reference _ZTHxx init functions which are aliases to __tls_init. If __tls_init is not created for aux modules, what symbol will those _ZTHxx funcitons aliased to? Is it better just let _tls_init function to be created as usual, but let the standard promotion kick in later? The ZTHxx functions can still be marked as alias to the promoted __tls_init? David > > Teresa > >> >> David >> >> On Thu, Mar 19, 2015 at 5:51 PM, Teresa Johnson wrote: >>> On Thu, Mar 19, 2015 at 4:45 PM, Xinliang David Li >>> wrote: does generate_tls_wrapper also need to be suppressed for aux module? >>> >>> No, we generate the wrapper in the aux module and use it to access the >>> TLS variable and invoke it's init function (which is defined in the >>> variable's own module). Presumably we could generate that only in the >>> original module and promote it if necessary, but generating it in the >>> aux module does allow it to be inlined. This is essentially how the >>> TLS accesses are handled for extern TLS variables defined elsewhere >>> (wrapper generated in referring module). >>> >>> Teresa >>> David On Thu, Mar 19, 2015 at 4:11 PM, Teresa Johnson wrote: > New patch below. Passes regression tests plus internal application build. > > 2015-03-19 Teresa Johnson > > gcc/ > Google ref b/19618364. > * cp/decl2.c (get_local_tls_init_fn): Assert on aux module. > (get_tls_init_fn): Promote non-public init functions if necessary > in LIPO mode, record new global at module scope. > (get_tls_wrapper_fn): Record new global at module scope. > (handle_tls_init): Skip in aux module, setup alias in exported > primary module. > > testsuite/ > Google ref b/19618364. > * testsuite/g++.dg/tree-prof/lipo/tls.h: New test. > * testsuite/g++.dg/tree-prof/lipo/tls2.h: Ditto. > * testsuite/g++.dg/tree-prof/lipo/tls2_0.C: Ditto. > * testsuite/g++.dg/tree-prof/lipo/tls2_1.C: Ditto. > * testsuite/g++.dg/tree-prof/lipo/tls_0.C: Ditto. > * testsuite/g++.dg/tree-prof/lipo/tls_1.C: Ditto. > > Index: cp/decl2.c > === > --- cp/decl2.c (revision 221425) > +++ cp/decl2.c (working copy) > @@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see > #include "langhooks.h" > #include "c-family/c-ada-spec.h" > #include "asan.h" > +#include "gcov-io.h" > > extern cpp_reader *parse_in; > > @@ -3024,6 +3025,9 @@ var_needs_tls_wrapper (tree var) > static tree > get_local_tls_init_fn (void) > { > + /* In LIPO mode we should not generate local init functions for > + the aux modules (see handle_tls_init). */ > + gcc_assert (!L_IPO_IS_AUXILIARY_MODULE); >tree sname = get_identifier ("__tls_init"); >tree fn = IDENTIFIER_GLOBAL_VALUE (sname); >if (!fn) > @@ -3057,14 +3061,39 @@ get_tls_init_fn (tree var) >if (!flag_extern_tls_init && DECL_EXTERNAL (var)) > return NULL_TREE; > > + /* In LIPO mode we should not generate local init functions for > + aux modules. The wrapper will reference the variable's init function > + that is defined in its own primary module. Otherwise there is > + a name conflict between the primary and aux __tls_init functions, > + and difficulty ensuring each TLS variable is initialized exactly > once. > + Therefore, if this is an aux module or an exported primary module, > we > + need to ensure that the init function is available externally by > + promoting it if it is not already public. This is similar to the > + handling in promot
Fix handling of CIF_FINAL_ERROR codes in inliner
Hi, this patch fixes accounting error in inliner growth calculation. We have CIF_FINAL_NORMAL and CIF_FINAL_ERROR inline_failed codes. The FINAL_ERROR means that it can not be revisited and inlining will fail. Problem is that CIF_FINAL_ERROR is set at link time and not revisited during LTO merging that may turn CIF_FINAL_ERROR into CIF_FINAL_NORMAL. This patch makes the revisit to happen and also makes can_inline_edge_p to more consistently short circuit when inlining is known to be impossible. I also went across cif-code.def and fixed codes. Bootstrapped/regtested x86_64-linux. This patch fixes regression on vortex benchmark (among others probably). I will commit it after bit of more testing. Honza * ipa-inline.c (can_inline_edge_p): Short circuit if inline_failed already is final. (ipa_inline): Recompute inline_failed codes. * cif-code.def (FUNCTION_NOT_OPTIMIZED, REDEFINED_EXTERN_INLINE, USES_COMDAT_LOCAL, ATTRIBUTE_MISMATCH, UNREACHABLE): Declare as CIF_FINAL_ERROR. Index: ipa-inline.c === --- ipa-inline.c(revision 221523) +++ ipa-inline.c(working copy) @@ -312,6 +312,15 @@ static bool can_inline_edge_p (struct cgraph_edge *e, bool report, bool disregard_limits = false, bool early = false) { + gcc_checking_assert (e->inline_failed); + + if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR) +{ + if (report) +report_inline_failed_reason (e); + return false; +} + bool inlinable = true; enum availability avail; cgraph_node *callee = e->callee->ultimate_alias_target (&avail); @@ -323,9 +332,7 @@ can_inline_edge_p (struct cgraph_edge *e struct function *caller_fun = caller->get_fun (); struct function *callee_fun = callee ? callee->get_fun () : NULL; - gcc_assert (e->inline_failed); - - if (!callee || !callee->definition) + if (!callee->definition) { e->inline_failed = CIF_BODY_NOT_AVAILABLE; inlinable = false; @@ -363,8 +370,7 @@ can_inline_edge_p (struct cgraph_edge *e } /* TM pure functions should not be inlined into non-TM_pure functions. */ - else if (is_tm_pure (callee->decl) - && !is_tm_pure (caller->decl)) + else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl)) { e->inline_failed = CIF_UNSPECIFIED; inlinable = false; @@ -2289,7 +2295,22 @@ ipa_inline (void) nnodes = ipa_reverse_postorder (order); FOR_EACH_FUNCTION (node) -node->aux = 0; +{ + node->aux = 0; + + /* Recompute the default reasons for inlining because they may have +changed during merging. */ + if (in_lto_p) + { + for (cgraph_edge *e = node->callees; e; e = e->next_callee) + { + gcc_assert (e->inline_failed); + initialize_inline_failed (e); + } + for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee) + initialize_inline_failed (e); + } +} if (dump_file) fprintf (dump_file, "\nFlattening functions:\n"); Index: cif-code.def === --- cif-code.def(revision 221523) +++ cif-code.def(working copy) @@ -39,7 +39,7 @@ DEFCIFCODE(FUNCTION_NOT_CONSIDERED, CIF_ N_("function not considered for inlining")) /* Caller is compiled with optimizations disabled. */ -DEFCIFCODE(FUNCTION_NOT_OPTIMIZED, CIF_FINAL_NORMAL, +DEFCIFCODE(FUNCTION_NOT_OPTIMIZED, CIF_FINAL_ERROR, N_("caller is not optimized")) /* Inlining failed owing to unavailable function body. */ @@ -47,7 +47,7 @@ DEFCIFCODE(BODY_NOT_AVAILABLE, CIF_FINAL N_("function body not available")) /* Extern inline function that has been redefined. */ -DEFCIFCODE(REDEFINED_EXTERN_INLINE, CIF_FINAL_NORMAL, +DEFCIFCODE(REDEFINED_EXTERN_INLINE, CIF_FINAL_ERROR, N_("redefined extern inline functions are not considered for " "inlining")) @@ -121,13 +121,13 @@ DEFCIFCODE(OPTIMIZATION_MISMATCH, CIF_FI N_("optimization level attribute mismatch")) /* We can't inline because the callee refers to comdat-local symbols. */ -DEFCIFCODE(USES_COMDAT_LOCAL, CIF_FINAL_NORMAL, +DEFCIFCODE(USES_COMDAT_LOCAL, CIF_FINAL_ERROR, N_("callee refers to comdat-local symbols")) /* We can't inline because of mismatched caller/callee attributes. */ -DEFCIFCODE(ATTRIBUTE_MISMATCH, CIF_FINAL_NORMAL, +DEFCIFCODE(ATTRIBUTE_MISMATCH, CIF_FINAL_ERROR, N_("function attribute mismatch")) /* We proved that the call is unreachable. */ -DEFCIFCODE(UNREACHABLE, CIF_FINAL_NORMAL, +DEFCIFCODE(UNREACHABLE, CIF_FINAL_ERROR, N_("unreachable"))
Fix ICE in ipa-devirt
Hi, this patch fixes ICE in where ODR violation merge non-polymorphic and polymorphic type. I will keep the PR open as I would like to improve the ODR violation warning Bootstrapped/regtested x86_64-linux, will commit it tomorrow. Honza PR ipa/65475 * ipa-devirt.c (add_type_duplicate): Prevail polymorphic type over non-polymorphic * g++.dg/lto/pr65475_0.C: New testcase. * g++.dg/lto/pr65475_1.C: New testcase. Index: testsuite/g++.dg/lto/pr65475_0.C === --- testsuite/g++.dg/lto/pr65475_0.C(revision 0) +++ testsuite/g++.dg/lto/pr65475_0.C(revision 0) @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -r -nostdlib" } */ +namespace std { +class ios_base { + struct A {}; + class __attribute((__abi_tag__("cxx11"))) failure : A {}; +} a; +} + Index: testsuite/g++.dg/lto/pr65475_1.C === --- testsuite/g++.dg/lto/pr65475_1.C(revision 0) +++ testsuite/g++.dg/lto/pr65475_1.C(revision 0) @@ -0,0 +1,27 @@ +namespace std { +template class Trans_NS___cxx11_basic_ostringstream; +class ios_base { + class __attribute((__abi_tag__("cxx11"))) failure { +virtual char m_fn2(); + }; +}; +class B : virtual ios_base {}; +template class Trans_NS___cxx11_basic_ostringstream : B { +public: + void m_fn1(); +}; +} + +class A { +public: + A(int) { +std::Trans_NS___cxx11_basic_ostringstream a; +a.m_fn1(); + } +}; +int b; +void fn1() { (A(b)); } +int +main() +{ +} Index: ipa-devirt.c === --- ipa-devirt.c(revision 221528) +++ ipa-devirt.c(working copy) @@ -1412,9 +1412,18 @@ add_type_duplicate (odr_type val, tree t if (!val->types_set) val->types_set = new hash_set; + /* Chose polymorphic type as leader (this happens only in case of ODR + violations. */ + if ((TREE_CODE (type) == RECORD_TYPE && TYPE_BINFO (type) + && polymorphic_type_binfo_p (TYPE_BINFO (type))) + && (TREE_CODE (val->type) != RECORD_TYPE || !TYPE_BINFO (val->type) + || !polymorphic_type_binfo_p (TYPE_BINFO (val->type +{ + prevail = true; + build_bases = true; +} /* Always prefer complete type to be the leader. */ - - if (!COMPLETE_TYPE_P (val->type) && COMPLETE_TYPE_P (type)) + else if (!COMPLETE_TYPE_P (val->type) && COMPLETE_TYPE_P (type)) { prevail = true; build_bases = TYPE_BINFO (type);