> Hello. > > Following patch tries to resolve following 2 issues: > > a) When one takes address of a function that uses target_clones attribute, > default implementation is always returned. > > b) Using dlsym("foo") should work and thus the resolver function should > use the default name. Because of that, default implementation must be > renamed. > > Unfortunately, we currently do not support redirection of ipa_refs, thus > walk_tree is needed to resolve that. Hopefully there should not be any > different IPA_REF that needs to be handled.
The cgraph interface for redirection is meant mostly for full IPA passes that can not touch bodies directly, in this case I think it is fine to walk all references. > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed? > Martin > >From 198c8464978c21cd68d4743de5648ecfefd2e09c Mon Sep 17 00:00:00 2001 > From: marxin <mli...@suse.cz> > Date: Wed, 17 May 2017 15:56:22 +0200 > Subject: [PATCH] Fix multi-versioning issues (PR ipa/80732). > > gcc/ChangeLog: > > 2017-05-19 Martin Liska <mli...@suse.cz> > > PR ipa/80732 > * attribs.c (make_dispatcher_decl): Do not append '.ifunc' > to dispatcher function name. > * multiple_target.c (replace_function_decl): New function. > (create_dispatcher_calls): Redirect both edges and references. > > gcc/testsuite/ChangeLog: > > 2017-05-19 Martin Liska <mli...@suse.cz> > > PR ipa/80732 > * gcc.target/i386/mvc5.c: Scan indirect_function. > * gcc.target/i386/mvc7.c: Likewise. > * gcc.target/i386/pr80732.c: New test. > --- > gcc/attribs.c | 6 +- > gcc/multiple_target.c | 105 > ++++++++++++++++++++++---------- > gcc/testsuite/gcc.target/i386/mvc5.c | 2 +- > gcc/testsuite/gcc.target/i386/mvc7.c | 2 +- > gcc/testsuite/gcc.target/i386/pr80732.c | 85 ++++++++++++++++++++++++++ > 5 files changed, 161 insertions(+), 39 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr80732.c > > diff --git a/gcc/attribs.c b/gcc/attribs.c > index 4ba0eab8899..5eb19e82795 100644 > --- a/gcc/attribs.c > +++ b/gcc/attribs.c > @@ -888,12 +888,8 @@ make_dispatcher_decl (const tree decl) > tree func_decl; > char *func_name; > tree fn_type, func_type; > - bool is_uniq = false; > > - if (TREE_PUBLIC (decl) == 0) > - is_uniq = true; > - > - func_name = make_unique_name (decl, "ifunc", is_uniq); > + func_name = xstrdup (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); > > fn_type = TREE_TYPE (decl); > func_type = build_function_type (TREE_TYPE (fn_type), > diff --git a/gcc/multiple_target.c b/gcc/multiple_target.c > index 2ee6a9591ba..fba2636ba16 100644 > --- a/gcc/multiple_target.c > +++ b/gcc/multiple_target.c > @@ -34,6 +34,27 @@ along with GCC; see the file COPYING3. If not see > #include "target.h" > #include "attribs.h" > #include "pretty-print.h" > +#include "gimple-iterator.h" > +#include "gimple-walk.h" > + > +/* Walker callback that replaces all FUNCTION_DECL of a function that's > + going to be versioned. */ > + > +static tree > +replace_function_decl (tree *op, int *walk_subtrees, void *data) > +{ > + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; > + cgraph_function_version_info *info = (cgraph_function_version_info > *)wi->info; > + > + if (TREE_CODE (*op) == FUNCTION_DECL > + && info->this_node->decl == *op) > + { > + *op = info->dispatcher_resolver; > + *walk_subtrees = 0; > + } > + > + return NULL; > +} > > /* If the call in NODE has multiple target attribute with multiple fields, > replace it with dispatcher call and create dispatcher (once). */ > @@ -41,51 +62,48 @@ along with GCC; see the file COPYING3. If not see > static void > create_dispatcher_calls (struct cgraph_node *node) > { > - cgraph_edge *e; > - cgraph_edge *e_next = NULL; > + ipa_ref *ref; > + > + if (!DECL_FUNCTION_VERSIONED (node->decl)) > + return; > + > + auto_vec<cgraph_edge *> edges_to_redirect; > + auto_vec<ipa_ref *> references_to_redirect; > + > + for (unsigned i = 0; node->iterate_referring (i, ref); i++) > + references_to_redirect.safe_push (ref); > > /* We need to remember NEXT_CALLER as it could be modified in the loop. */ > - for (e = node->callers; e ;e = (e == NULL) ? e_next : e->next_caller) > - { > - tree resolver_decl; > - tree idecl; > - tree decl; > - gimple *call = e->call_stmt; > - struct cgraph_node *inode; > - > - /* Checking if call of function is call of versioned function. > - Versioned function are not inlined, so there is no need to > - check for inline. */ > - if (!call > - || !(decl = gimple_call_fndecl (call)) > - || !DECL_FUNCTION_VERSIONED (decl)) > - continue; > + for (cgraph_edge *e = node->callers; e ; e = e->next_caller) > + edges_to_redirect.safe_push (e); > > + if (!edges_to_redirect.is_empty () || !references_to_redirect.is_empty ()) > + { > if (!targetm.has_ifunc_p ()) > { > - error_at (gimple_location (call), > + error_at (DECL_SOURCE_LOCATION (node->decl), > "the call requires ifunc, which is not" > " supported by this target"); > - break; > + return; > } > else if (!targetm.get_function_versions_dispatcher) > { > - error_at (gimple_location (call), > + error_at (DECL_SOURCE_LOCATION (node->decl), > "target does not support function version dispatcher"); > - break; > + return; > } > > - e_next = e->next_caller; > - idecl = targetm.get_function_versions_dispatcher (decl); > + tree idecl = targetm.get_function_versions_dispatcher (node->decl); > if (!idecl) > { > - error_at (gimple_location (call), > + error_at (DECL_SOURCE_LOCATION (node->decl), > "default target_clones attribute was not set"); > - break; > + return; > } > - inode = cgraph_node::get (idecl); > + > + cgraph_node *inode = cgraph_node::get (idecl); > gcc_assert (inode); > - resolver_decl = targetm.generate_version_dispatcher_body (inode); > + tree resolver_decl = targetm.generate_version_dispatcher_body (inode); > > /* Update aliases. */ > inode->alias = true; > @@ -93,12 +111,35 @@ create_dispatcher_calls (struct cgraph_node *node) > if (!inode->analyzed) > inode->resolve_alias (cgraph_node::get (resolver_decl)); > > - e->redirect_callee (inode); > - e->redirect_call_stmt_to_callee (); > - /* Since REDIRECT_CALLEE modifies NEXT_CALLER field we move to > - previously set NEXT_CALLER. */ > - e = NULL; > + /* Redirect edges. */ > + unsigned i; > + cgraph_edge *e; > + FOR_EACH_VEC_ELT (edges_to_redirect, i, e) > + { > + e->redirect_callee (inode); > + e->redirect_call_stmt_to_callee (); > + } > + > + /* Redirect references. */ > + FOR_EACH_VEC_ELT (references_to_redirect, i, ref) > + { > + if (ref->use == IPA_REF_ADDR) > + { > + struct walk_stmt_info wi; > + memset (&wi, 0, sizeof (wi)); > + wi.info = (void *)node->function_version (); > + gimple_stmt_iterator it = gsi_for_stmt (ref->stmt); > + if (ref->referring->decl != resolver_decl) > + walk_gimple_stmt (&it, NULL, replace_function_decl, &wi); > + } > + else > + gcc_unreachable (); Don't you need to handle static initializers here as well? OK with this change or explanation why you don't :) Thanks, Honza > + } > } > + > + symtab->change_decl_assembler_name (node->decl, > + clone_function_name (node->decl, > + "default")); > } > > /* Return length of attribute names string, > diff --git a/gcc/testsuite/gcc.target/i386/mvc5.c > b/gcc/testsuite/gcc.target/i386/mvc5.c > index 8fea04c792b..677f79f3fd0 100644 > --- a/gcc/testsuite/gcc.target/i386/mvc5.c > +++ b/gcc/testsuite/gcc.target/i386/mvc5.c > @@ -1,7 +1,7 @@ > /* { dg-do compile } */ > /* { dg-require-ifunc "" } */ > /* { dg-options "-fno-inline" } */ > -/* { dg-final { scan-assembler-times "foo.ifunc" 6 } } */ > +/* { dg-final { scan-assembler "foo,foo.resolver" } } */ > > __attribute__((target_clones("default","avx","avx2"))) > int > diff --git a/gcc/testsuite/gcc.target/i386/mvc7.c > b/gcc/testsuite/gcc.target/i386/mvc7.c > index 9a9d7a3da30..a3697ba9b75 100644 > --- a/gcc/testsuite/gcc.target/i386/mvc7.c > +++ b/gcc/testsuite/gcc.target/i386/mvc7.c > @@ -3,7 +3,7 @@ > /* { dg-final { scan-assembler "foo.resolver" } } */ > /* { dg-final { scan-assembler "avx" } } */ > /* { dg-final { scan-assembler "slm" } } */ > -/* { dg-final { scan-assembler-times "foo.ifunc" 4 } } */ > +/* { dg-final { scan-assembler "foo,foo.resolver" } } */ > > __attribute__((target_clones("avx","default","arch=slm","arch=core-avx2"))) > int foo (); > diff --git a/gcc/testsuite/gcc.target/i386/pr80732.c > b/gcc/testsuite/gcc.target/i386/pr80732.c > new file mode 100644 > index 00000000000..fb168260eaa > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr80732.c > @@ -0,0 +1,85 @@ > +/* PR ipa/80732 */ > +/* { dg-do run } */ > +/* { dg-options "-ldl -fPIC -rdynamic -O3 -g" } */ > +/* { dg-require-ifunc "" } */ > +/* { dg-require-effective-target fma4 } */ > + > + > +#include <dlfcn.h> > + > +__attribute__((target_clones("default","fma"),noinline,optimize("fast-math"))) > +double f1(double a, double b, double c) > +{ > + return a * b + c; > +} > + > +double k1(double a, double b, double c, void **p) > +{ > + *p = f1; > + return f1(a, b, c); > +} > + > +__attribute__((target("fma"),optimize("fast-math"))) > +static double f2_fma(double a, double b, double c) > +{ > + return a * b + c; > +} > + > +__attribute__((optimize("fast-math"))) > +static double f2_default(double a, double b, double c) > +{ > + return a * b + c; > +} > + > +static void *f2_resolve(void) > +{ > + __builtin_cpu_init (); > + if (__builtin_cpu_supports("fma")) > + return f2_fma; > + else > + return f2_default; > +} > + > +double f2(double a, double b, double c) __attribute__((ifunc("f2_resolve"))); > + > +double k2(double a, double b, double c, void **p) > +{ > + *p = f2; > + return f2(a, b, c); > +} > + > +int main() > +{ > + char buffer[256]; > + const char *expectation = "4.93038e-32, 4.93038e-32, 4.93038e-32"; > + > + volatile double a = 1.0000000000000002; > + volatile double b = -0.9999999999999998; > + volatile double c = 1.0; > + > + void *hdl = dlopen(0, RTLD_NOW); > + > + double (*pf1)(double, double, double) = dlsym(hdl, "f1"); > + double (*pk1)(double, double, double, void**) = dlsym(hdl, "k1"); > + double (*_pf1)(double, double, double); > + > + double v1_1 = pf1(a, b, c); > + double v1_2 = pk1(a, b, c, (void**)&_pf1); > + double v1_3 = _pf1(a, b, c); > + __builtin_sprintf (buffer, "%g, %g, %g", v1_1, v1_2, v1_3); > + if (__builtin_strcmp (buffer, expectation) != 0) > + __builtin_abort (); > + > + double (*pf2)(double, double, double) = dlsym(hdl, "f2"); > + double (*pk2)(double, double, double, void**) = dlsym(hdl, "k2"); > + double (*_pf2)(double, double, double); > + > + double v2_1 = pf2(a, b, c); > + double v2_2 = pk2(a, b, c, (void**)&_pf2); > + double v2_3 = _pf2(a, b, c); > + __builtin_sprintf(buffer, "%g, %g, %g", v2_1, v2_2, v2_3); > + if (__builtin_strcmp (buffer, expectation) != 0) > + __builtin_abort (); > + > + return 0; > +} > -- > 2.12.2 >