On Wed, Apr 17, 2013 at 5:59 PM, Martin Jambor <mjam...@suse.cz> wrote: > Hi, > > this patch implements type-based devirtualization done > intra-procedurally. This is normally done by the front-end except in > cases when opportunities for this transformation are created by > early-inlining. Because we handle this situation at IPA-level > (especially in inlining but also in IPA-CP), this can currently mean > that some code runs much faster when compiled without early-inlining, > e.g. the PR 56718 testcase. > > This patch piggy-backs on PRE OBJ_TYPE_REF folding and when that > fails, it calls into ipa-cp routines that simply use the > infrastructure we have for construction of known-type jump functions > to figure out the type of the object involved. If that is known, the > appropriate method is looked up in the VMT obtained from its BINFO. > > From now on I'll concentrate on tracking of VMT pointers within > objects rather than on type-based techniques in my devirtualization > efforts. Nevertheless, I do believe we want to have this patch in > because (as opposed to VMT tracking) it can also work when > constructors are in a different compilation unit. Even these > situations do not occur that often, the effects can be quite > substantial when we miss an inlining opportunity, the testcase in the > PR 56718 is rather artificial but 2.5 times quicker with the patch. > > A very similar patch has passed bootstrap and testing on x86_64-linux > without any problems, I'm bootstrapping and testing this exact one > now. OK for trunk if it passes?
Ok. Thanks, Richard. > Thanks, > > Martin > > > 2013-03-25 Martin Jambor <mjam...@suse.cz> > > PR tree-optimization/56718 > * ipa-cp.c (ipa_value_from_known_type_jfunc): Moved... > * ipa-prop.c (ipa_binfo_from_known_type_jfunc): ...here, renamed > and made public. adjusted all callers. > (ipa_intraprocedural_devirtualization): New function. > * ipa-prop.h (ipa_binfo_from_known_type_jfunc): Declare. > (ipa_intraprocedural_devirtualization): Likewise. > > * Makefile.in (tree-ssa-pre.o): Add ipa-prop.h to dependencies. > > testsuite/ > * g++.dg/ipa/imm-devirt-1.C: New test. > * g++.dg/ipa/imm-devirt-2.C: Likewise. > > *** /tmp/ITGyHx_Makefile.in 2013-04-16 00:02:39.000000000 +0200 > --- gcc/Makefile.in 2013-04-15 15:02:53.079696533 +0200 > *************** tree-ssa-pre.o : tree-ssa-pre.c $(TREE_F > *** 2369,2375 **** > $(TM_H) coretypes.h $(TREE_PASS_H) $(FLAGS_H) langhooks.h \ > $(CFGLOOP_H) alloc-pool.h $(BASIC_BLOCK_H) $(BITMAP_H) $(HASH_TABLE_H) \ > $(GIMPLE_H) $(TREE_INLINE_H) tree-iterator.h tree-ssa-sccvn.h > $(PARAMS_H) \ > ! $(DBGCNT_H) tree-scalar-evolution.h $(GIMPLE_PRETTY_PRINT_H) domwalk.h > tree-ssa-sccvn.o : tree-ssa-sccvn.c $(TREE_FLOW_H) $(CONFIG_H) \ > $(SYSTEM_H) $(TREE_H) $(DIAGNOSTIC_H) \ > $(TM_H) coretypes.h $(DUMPFILE_H) $(FLAGS_H) $(CFGLOOP_H) \ > --- 2369,2376 ---- > $(TM_H) coretypes.h $(TREE_PASS_H) $(FLAGS_H) langhooks.h \ > $(CFGLOOP_H) alloc-pool.h $(BASIC_BLOCK_H) $(BITMAP_H) $(HASH_TABLE_H) \ > $(GIMPLE_H) $(TREE_INLINE_H) tree-iterator.h tree-ssa-sccvn.h > $(PARAMS_H) \ > ! $(DBGCNT_H) tree-scalar-evolution.h $(GIMPLE_PRETTY_PRINT_H) domwalk.h \ > ! $(IPA_PROP_H) > tree-ssa-sccvn.o : tree-ssa-sccvn.c $(TREE_FLOW_H) $(CONFIG_H) \ > $(SYSTEM_H) $(TREE_H) $(DIAGNOSTIC_H) \ > $(TM_H) coretypes.h $(DUMPFILE_H) $(FLAGS_H) $(CFGLOOP_H) \ > *** /tmp/CO3mFA_ipa-cp.c 2013-04-16 00:02:39.000000000 +0200 > --- gcc/ipa-cp.c 2013-04-15 15:02:53.070696564 +0200 > *************** ipa_get_jf_ancestor_result (struct ipa_j > *** 791,810 **** > return NULL_TREE; > } > > - /* Extract the acual BINFO being described by JFUNC which must be a known > type > - jump function. */ > - > - static tree > - ipa_value_from_known_type_jfunc (struct ipa_jump_func *jfunc) > - { > - tree base_binfo = TYPE_BINFO (ipa_get_jf_known_type_base_type (jfunc)); > - if (!base_binfo) > - return NULL_TREE; > - return get_binfo_at_offset (base_binfo, > - ipa_get_jf_known_type_offset (jfunc), > - ipa_get_jf_known_type_component_type (jfunc)); > - } > - > /* Determine whether JFUNC evaluates to a known value (that is either a > constant or a binfo) and if so, return it. Otherwise return NULL. INFO > describes the caller node so that pass-through jump functions can be > --- 791,796 ---- > *************** ipa_value_from_jfunc (struct ipa_node_pa > *** 816,822 **** > if (jfunc->type == IPA_JF_CONST) > return ipa_get_jf_constant (jfunc); > else if (jfunc->type == IPA_JF_KNOWN_TYPE) > ! return ipa_value_from_known_type_jfunc (jfunc); > else if (jfunc->type == IPA_JF_PASS_THROUGH > || jfunc->type == IPA_JF_ANCESTOR) > { > --- 802,808 ---- > if (jfunc->type == IPA_JF_CONST) > return ipa_get_jf_constant (jfunc); > else if (jfunc->type == IPA_JF_KNOWN_TYPE) > ! return ipa_binfo_from_known_type_jfunc (jfunc); > else if (jfunc->type == IPA_JF_PASS_THROUGH > || jfunc->type == IPA_JF_ANCESTOR) > { > *************** propagate_scalar_accross_jump_function ( > *** 1103,1109 **** > > if (jfunc->type == IPA_JF_KNOWN_TYPE) > { > ! val = ipa_value_from_known_type_jfunc (jfunc); > if (!val) > return set_lattice_contains_variable (dest_lat); > } > --- 1089,1095 ---- > > if (jfunc->type == IPA_JF_KNOWN_TYPE) > { > ! val = ipa_binfo_from_known_type_jfunc (jfunc); > if (!val) > return set_lattice_contains_variable (dest_lat); > } > *** /tmp/I9ETWG_ipa-prop.c 2013-04-16 00:02:39.000000000 +0200 > --- gcc/ipa-prop.c 2013-04-15 23:59:01.435849520 +0200 > *************** ipa_set_ancestor_jf (struct ipa_jump_fun > *** 356,361 **** > --- 356,375 ---- > jfunc->value.ancestor.agg_preserved = agg_preserved; > } > > + /* Extract the acual BINFO being described by JFUNC which must be a known > type > + jump function. */ > + > + tree > + ipa_binfo_from_known_type_jfunc (struct ipa_jump_func *jfunc) > + { > + tree base_binfo = TYPE_BINFO (jfunc->value.known_type.base_type); > + if (!base_binfo) > + return NULL_TREE; > + return get_binfo_at_offset (base_binfo, > + jfunc->value.known_type.offset, > + jfunc->value.known_type.component_type); > + } > + > /* Structure to be passed in between detect_type_change and > check_stmt_for_type_change. */ > > *************** ipa_analyze_node (struct cgraph_node *no > *** 1957,1962 **** > --- 1971,2000 ---- > pop_cfun (); > } > > + /* Given a statement CALL which must be a GIMPLE_CALL calling an > OBJ_TYPE_REF > + attempt a type-based devirtualization. If successful, return the > + target function declaration, otherwise return NULL. */ > + > + tree > + ipa_intraprocedural_devirtualization (gimple call) > + { > + tree binfo, token, fndecl; > + struct ipa_jump_func jfunc; > + tree otr = gimple_call_fn (call); > + > + jfunc.type = IPA_JF_UNKNOWN; > + compute_known_type_jump_func (OBJ_TYPE_REF_OBJECT (otr), &jfunc, > + call); > + if (jfunc.type != IPA_JF_KNOWN_TYPE) > + return NULL_TREE; > + binfo = ipa_binfo_from_known_type_jfunc (&jfunc); > + if (!binfo) > + return NULL_TREE; > + token = OBJ_TYPE_REF_TOKEN (otr); > + fndecl = gimple_get_virt_method_for_binfo (tree_low_cst (token, 1), > + binfo); > + return fndecl; > + } > > /* Update the jump function DST when the call graph edge corresponding to > SRC is > is being inlined, knowing that DST is of type ancestor and src of known > *** /tmp/0gaI2G_ipa-prop.h 2013-04-16 00:02:39.000000000 +0200 > --- gcc/ipa-prop.h 2013-04-15 23:58:19.192973481 +0200 > *************** tree ipa_get_indirect_edge_target (struc > *** 507,512 **** > --- 507,514 ---- > vec<tree> , > vec<ipa_agg_jump_function_p> ); > struct cgraph_edge *ipa_make_edge_direct_to_target (struct cgraph_edge *, > tree); > + tree ipa_binfo_from_known_type_jfunc (struct ipa_jump_func *); > + tree ipa_intraprocedural_devirtualization (gimple); > > /* Functions related to both. */ > void ipa_analyze_node (struct cgraph_node *); > *** /dev/null 2013-03-18 12:22:28.149000077 +0100 > --- gcc/testsuite/g++.dg/ipa/imm-devirt-1.C 2013-04-15 15:02:48.043713609 > +0200 > *************** > *** 0 **** > --- 1,62 ---- > + /* Verify that virtual calls are folded even early inlining puts them into > one > + function with the definition. */ > + /* { dg-do run } */ > + /* { dg-options "-O2 -fdump-tree-fre1-details" } */ > + > + extern "C" void abort (void); > + > + class A > + { > + public: > + int data; > + virtual int foo (int i); > + }; > + > + > + class B : public A > + { > + public: > + __attribute__ ((noinline)) B(); > + virtual int foo (int i); > + }; > + > + int __attribute__ ((noinline)) A::foo (int i) > + { > + return i + 1; > + } > + > + int __attribute__ ((noinline)) B::foo (int i) > + { > + return i + 2; > + } > + > + int __attribute__ ((noinline,noclone)) get_input(void) > + { > + return 1; > + } > + > + __attribute__ ((noinline)) B::B() > + { > + } > + > + static inline int middleman_1 (class A *obj, int i) > + { > + return obj->foo (i); > + } > + > + static inline int middleman_2 (class B *obj, int i) > + { > + return middleman_1 (obj, i); > + } > + > + int main (int argc, char *argv[]) > + { > + class B b; > + > + if (middleman_2 (&b, get_input ()) != 3) > + abort (); > + return 0; > + } > + > + /* { dg-final { scan-tree-dump "Replacing call target with foo" "fre1" } } > */ > + /* { dg-final { cleanup-tree-dump "fre1" } } */ > *** /dev/null 2013-03-18 12:22:28.149000077 +0100 > --- gcc/testsuite/g++.dg/ipa/imm-devirt-2.C 2013-04-15 15:02:48.046713604 > +0200 > *************** > *** 0 **** > --- 1,95 ---- > + /* Verify that virtual calls are folded even early inlining puts them into > one > + function with the definition. */ > + /* { dg-do run } */ > + /* { dg-options "-O2 -fdump-tree-fre1-details" } */ > + > + extern "C" void abort (void); > + > + class Distraction > + { > + public: > + float f; > + double d; > + Distraction () > + { > + f = 8.3; > + d = 10.2; > + } > + virtual float bar (float z); > + }; > + > + class A > + { > + public: > + int data; > + virtual int foo (int i); > + }; > + > + class B : public A > + { > + public: > + int data_2; > + virtual int foo (int i); > + virtual int baz (int i); > + }; > + > + > + class C : public Distraction, public B > + { > + public: > + __attribute__ ((noinline)) C(); > + virtual int foo (int i); > + }; > + > + float __attribute__ ((noinline)) Distraction::bar (float z) > + { > + f += z; > + return f/2; > + } > + > + int __attribute__ ((noinline)) A::foo (int i) > + { > + return i + 1; > + } > + > + int __attribute__ ((noinline)) B::foo (int i) > + { > + return i + 2; > + } > + > + int __attribute__ ((noinline)) B::baz (int i) > + { > + return i * 15; > + } > + > + int __attribute__ ((noinline)) C::foo (int i) > + { > + return i + 3; > + } > + > + int __attribute__ ((noinline,noclone)) get_input(void) > + { > + return 1; > + } > + > + static inline int middleman (class A *obj, int i) > + { > + return obj->foo (i); > + } > + > + __attribute__ ((noinline)) C::C() > + { > + } > + > + int main (int argc, char *argv[]) > + { > + class C c; > + > + if (middleman (&c, get_input ()) != 4) > + abort (); > + > + return 0; > + } > + > + /* { dg-final { scan-tree-dump "Replacing call target" "fre1" } } */ > + /* { dg-final { cleanup-tree-dump "fre1" } } */ > *** /tmp/oVoQsS_tree-ssa-pre.c 2013-04-16 00:02:39.000000000 +0200 > --- gcc/tree-ssa-pre.c 2013-04-15 23:57:57.442037275 +0200 > *************** along with GCC; see the file COPYING3. > *** 43,48 **** > --- 43,49 ---- > #include "params.h" > #include "dbgcnt.h" > #include "domwalk.h" > + #include "ipa-prop.h" > > /* TODO: > > *************** eliminate_bb (dom_walk_data *, basic_blo > *** 4326,4332 **** > fn = VN_INFO (orig_fn)->valnum; > else if (TREE_CODE (orig_fn) == OBJ_TYPE_REF > && TREE_CODE (OBJ_TYPE_REF_EXPR (orig_fn)) == SSA_NAME) > ! fn = VN_INFO (OBJ_TYPE_REF_EXPR (orig_fn))->valnum; > else > continue; > if (gimple_call_addr_fndecl (fn) != NULL_TREE > --- 4327,4341 ---- > fn = VN_INFO (orig_fn)->valnum; > else if (TREE_CODE (orig_fn) == OBJ_TYPE_REF > && TREE_CODE (OBJ_TYPE_REF_EXPR (orig_fn)) == SSA_NAME) > ! { > ! fn = VN_INFO (OBJ_TYPE_REF_EXPR (orig_fn))->valnum; > ! if (!gimple_call_addr_fndecl (fn)) > ! { > ! fn = ipa_intraprocedural_devirtualization (stmt); > ! if (fn) > ! fn = build_fold_addr_expr (fn); > ! } > ! } > else > continue; > if (gimple_call_addr_fndecl (fn) != NULL_TREE