The following patch fixes http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56999
The problem was in complicated interactions of coalescing and doing/undoing inheritance and assignment sub-passes through several iterations. One solution would be in modifying coalescing in order to take future actions in undoing inheritance. Another solution is to move coalesce pass after doing/undoing inheritance sub-passes. The first solution complicates code and make passes more dependable although potentially decreasing # passes of creating live ranges. The second solution makes coalescing pass simpler and less dependable. After some experiments I've chosen the 2nd solution as it makes code more maintainable and less error-prone. On my tests I found that in 32-bit mode it runs 10% more live-range passes (that is about 1/5th of LRA run time), in 64-bit mode the # passes are the same. I check cpu and real time on 500K lines fortran code and did not find any visible increase in compilation time.
The patch was successfully bootstrapped on x86-64. Committed to trunk as rev. 198092. 2013-04-18 Vladimir Makarov <vmaka...@redhat.com> PR rtl-optimization/56992 * lra-coalesce.c (coalescable_pseudo_p): Remove 2nd parameter and related code. (lra_coalesce): Remove split_origin_bitmap and related code. * lra.c (lra): Coalesce after undoing inheritance. Recreate live ranges if necessary. 2013-04-18 Jakub Jelinek <ja...@redhat.com> PR rtl-optimization/56992 * g++.dg/opt/pr56999.C: New test.
Index: lra-coalesce.c =================================================================== --- lra-coalesce.c (revision 198077) +++ lra-coalesce.c (working copy) @@ -201,24 +201,14 @@ update_live_info (bitmap lr_bitmap) } } -/* Return true if pseudo REGNO can be potentially coalesced. Use - SPLIT_PSEUDO_BITMAP to find pseudos whose live ranges were - split. */ +/* Return true if pseudo REGNO can be potentially coalesced. */ static bool -coalescable_pseudo_p (int regno, bitmap split_origin_bitmap) +coalescable_pseudo_p (int regno) { lra_assert (regno >= FIRST_PSEUDO_REGISTER); - /* Don't coalesce inheritance pseudos because spilled inheritance - pseudos will be removed in subsequent 'undo inheritance' - pass. */ - return (lra_reg_info[regno].restore_regno < 0 - /* We undo splits for spilled pseudos whose live ranges were - split. So don't coalesce them, it is not necessary and - the undo transformations would be wrong. */ - && ! bitmap_bit_p (split_origin_bitmap, regno) - /* We don't want to coalesce regnos with equivalences, at + return (/* We don't want to coalesce regnos with equivalences, at least without updating this info. */ - && ira_reg_equiv[regno].constant == NULL_RTX + ira_reg_equiv[regno].constant == NULL_RTX && ira_reg_equiv[regno].memory == NULL_RTX && ira_reg_equiv[regno].invariant == NULL_RTX); } @@ -230,12 +220,10 @@ lra_coalesce (void) { basic_block bb; rtx mv, set, insn, next, *sorted_moves; - int i, mv_num, sregno, dregno, restore_regno; - unsigned int regno; + int i, mv_num, sregno, dregno; int coalesced_moves; int max_regno = max_reg_num (); - bitmap_head involved_insns_bitmap, split_origin_bitmap; - bitmap_iterator bi; + bitmap_head involved_insns_bitmap; timevar_push (TV_LRA_COALESCE); @@ -249,11 +237,6 @@ lra_coalesce (void) first_coalesced_pseudo[i] = next_coalesced_pseudo[i] = i; sorted_moves = XNEWVEC (rtx, get_max_uid ()); mv_num = 0; - /* Collect pseudos whose live ranges were split. */ - bitmap_initialize (&split_origin_bitmap, ®_obstack); - EXECUTE_IF_SET_IN_BITMAP (&lra_split_regs, 0, regno, bi) - if ((restore_regno = lra_reg_info[regno].restore_regno) >= 0) - bitmap_set_bit (&split_origin_bitmap, restore_regno); /* Collect moves. */ coalesced_moves = 0; FOR_EACH_BB (bb) @@ -265,15 +248,13 @@ lra_coalesce (void) && (sregno = REGNO (SET_SRC (set))) >= FIRST_PSEUDO_REGISTER && (dregno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER && mem_move_p (sregno, dregno) - && coalescable_pseudo_p (sregno, &split_origin_bitmap) - && coalescable_pseudo_p (dregno, &split_origin_bitmap) + && coalescable_pseudo_p (sregno) && coalescable_pseudo_p (dregno) && ! side_effects_p (set) && !(lra_intersected_live_ranges_p (lra_reg_info[sregno].live_ranges, lra_reg_info[dregno].live_ranges))) sorted_moves[mv_num++] = insn; } - bitmap_clear (&split_origin_bitmap); qsort (sorted_moves, mv_num, sizeof (rtx), move_freq_compare_func); /* Coalesced copies, most frequently executed first. */ bitmap_initialize (&coalesced_pseudos_bitmap, ®_obstack); Index: lra.c =================================================================== --- lra.c (revision 198077) +++ lra.c (working copy) @@ -2295,11 +2295,20 @@ lra (FILE *f) lra_assign (); else { - /* Do coalescing only for regular algorithms. */ - if (! lra_assign () && lra_coalesce ()) - live_p = false; + bool spill_p = !lra_assign (); + if (lra_undo_inheritance ()) live_p = false; + if (spill_p) + { + if (! live_p) + { + lra_create_live_ranges (true); + live_p = true; + } + if (lra_coalesce ()) + live_p = false; + } if (! live_p) lra_clear_live_ranges (); } Index: testsuite/g++.dg/opt/pr56999.C =================================================================== --- testsuite/g++.dg/opt/pr56999.C (revision 0) +++ testsuite/g++.dg/opt/pr56999.C (working copy) @@ -0,0 +1,188 @@ +// PR rtl-optimization/56999 +// { dg-do run } +// { dg-options "-O2" } +// { dg-additional-options "-fpic" { target fpic } } +// { dg-additional-options "-march=i686 -mtune=atom" { target ia32 } } +// { dg-require-visibility "" } + +extern "C" void abort (void); +extern "C" void exit (int); +volatile bool do_exit = true; +struct JSScript; +struct JITScript { int i; }; +#pragma GCC visibility push(hidden) +typedef struct JSCompartment JSCompartment; +typedef struct JSContext JSContext; +namespace js +{ + struct ContextFriendFields + { + JSCompartment *compartment; + }; + struct TempAllocPolicy + { + }; + template <class T> + struct Vector + { + T *mBegin; + T *begin () { return mBegin; } + T & operator[] (unsigned i) { return begin ()[i]; } + template <class U> + __attribute__((noinline, noclone)) + bool append (U) { asm volatile ("" : : : "memory"); if (do_exit) abort (); return false; } + }; + namespace types + { + struct TypeCompartment; + } + namespace mjit + { + } + namespace ion + { + struct IonScript; + } + namespace types + { + struct CompilerOutput + { + enum Kind { MethodJIT, ParallelIon }; + JSScript *script; + unsigned kindInt : 2; + bool constructing : 1; + bool barriers : 1; + bool pendingRecompilation : 1; + Kind kind () const { return static_cast <Kind> (kindInt); } + bool isValid () const; + }; + struct RecompileInfo + { + unsigned outputIndex; + CompilerOutput *compilerOutput (TypeCompartment & types) const; + CompilerOutput *compilerOutput (JSContext *cx) const; + }; + struct TypeCompartment + { + Vector <CompilerOutput> *constrainedOutputs; + Vector <RecompileInfo> *pendingRecompiles; + void addPendingRecompile (JSContext *cx, const RecompileInfo & info); + }; + } +} +struct JSScript +{ + struct JITScriptHandle + { + static volatile JITScript *UNJITTABLE __attribute__((visibility ("default"))); + JITScript *value; + bool isValid () { return value != UNJITTABLE; } + JITScript *getValid () { return value; } + }; + struct JITScriptSet + { + JITScriptHandle jitHandleNormal, jitHandleNormalBarriered; + JITScriptHandle jitHandleCtor, jitHandleCtorBarriered; + JITScriptHandle jitNull1, jitNull2; + }; + JITScriptSet *mJITInfo; + void *ion; + JITScriptHandle *jitHandle (bool constructing, bool barriers) + { + return constructing ? (barriers ? &mJITInfo->jitHandleCtorBarriered + : &mJITInfo->jitHandleCtor) + : (barriers ? &mJITInfo->jitHandleNormalBarriered + : &mJITInfo->jitHandleNormal); + } + JITScript *getJIT (bool constructing, bool barriers) + { + JITScriptHandle *jith = jitHandle (constructing, barriers); + return jith->isValid () ? jith->getValid () : __null; + } +}; +struct JSContext : js::ContextFriendFields +{ +}; +namespace js +{ + __attribute__((noinline, noclone)) + void CancelOffThreadIonCompile (JSCompartment *, JSScript *) + { + if (do_exit) + exit (0); + } +} +struct JSCompartment +{ + js::types::TypeCompartment types; +}; +namespace js +{ + namespace types + { + inline bool CompilerOutput::isValid () const + { + if (!script) + return false; + switch (kind ()) + { + case MethodJIT: + { + JITScript *jit = script->getJIT (constructing, barriers); + if (!jit) + return false; + } + case ParallelIon: + return true; + } + return false; + } + inline CompilerOutput *RecompileInfo::compilerOutput (TypeCompartment & types) const + { + return &(*types.constrainedOutputs)[outputIndex]; + } + inline CompilerOutput *RecompileInfo::compilerOutput (JSContext *cx) const + { + return compilerOutput (cx->compartment->types); + } + } +} +using namespace js::types; +__attribute__((noinline, noclone)) void +TypeCompartment::addPendingRecompile (JSContext *cx, const RecompileInfo & info) +{ + CompilerOutput *co = info.compilerOutput (cx); + if (co->pendingRecompilation) + if (co->isValid ()) + CancelOffThreadIonCompile (cx->compartment, co->script); + if (co->isValid ()) + pendingRecompiles->append (info); +} +volatile JITScript *JSScript::JITScriptHandle::UNJITTABLE; +int +main () +{ + JSContext cx; + JSCompartment com; + RecompileInfo info; + cx.compartment = &com; + info.outputIndex = 0; + js::Vector<CompilerOutput> v; + JITScript js; + JSScript::JITScriptSet set; + __builtin_memset (&set, 0, sizeof set); + set.jitHandleCtor.value = &js; + JSScript s; + s.mJITInfo = &set; + CompilerOutput co; + co.kindInt = 0; + co.constructing = true; + co.barriers = false; + co.pendingRecompilation = true; + co.script = &s; + v.mBegin = &co; + com.types.constrainedOutputs = &v; + com.types.pendingRecompiles = __null; + com.types.addPendingRecompile (&cx, info); + abort (); +}