This patch adds support for target_clones table option. The
target_clones table option allows users to specify multiple versions
of a function and select the version at runtime based on the specified
table.
The target clone table is a JSON object where function names serve as
keys, and their values are nested objects. Each nested object maps
architecture names to lists of target clone attributes. This structure
allows specifying function clones for different architectures. Below is
an example:
```
{
"foo": {
"x86_64": ["avx2", "avx512f"],
"riscv64": ["arch=+v", "arch=+zba,+zbb", ...],
... // more architectures
},
// more functions
}
```
A example of the table is as follows on RISC-V:
C source code "ror32.c":
```
void ror32(unsigned int *a, unsigned int b, unsigned long size) {
for (unsigned long i = 0; i < size; i++) {
a[i] = a[i] >> b | (a[i] << (32 - b));
}
}
```
Table "ror32.target_clones":
```
{
"ror32": {
"riscv64": ["arch=+zvbb,+zbb", "arch=+zbb"]
}
}
```
Then use: gcc -O3 -ftarget-clones-table=ror32.target_clones -S ror32.c
to compile the source code. This will generate 3 versions and its IFUNC
resolver for the ror32 function which is "arch=+zvbb,+zbb" and
"arch=+zbb" and the default version.
Signed-off-by: Yangyu Chen <[email protected]>
gcc/ChangeLog:
* Makefile.in: Add -DTARGET_NAME to CFLAGS-multiple_target.o.
* common.opt: Add target clone table option.
* multiple_target.cc (expand_target_clones): Add support for
target_clones table option.
(node_versionable_function_p): New function to check if a function
can be versioned.
(init_clone_map): Ditto.
(ipa_target_clone): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/tct-0.c: New test.
* gcc.target/i386/tct-0.json: New test.
---
gcc/Makefile.in | 1 +
gcc/common.opt | 7 ++
gcc/multiple_target.cc | 145 +++++++++++++++++++++--
gcc/testsuite/gcc.target/i386/tct-0.c | 11 ++
gcc/testsuite/gcc.target/i386/tct-0.json | 5 +
5 files changed, 157 insertions(+), 12 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/tct-0.c
create mode 100644 gcc/testsuite/gcc.target/i386/tct-0.json
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 4503dab6037..99ed204f036 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2694,6 +2694,7 @@ s-bversion: BASE-VER
$(STAMP) s-bversion
CFLAGS-toplev.o += -DTARGET_NAME=\"$(target_noncanonical)\"
+CFLAGS-multiple_target.o += -DTARGET_NAME=\"$(target_noncanonical)\"
CFLAGS-tree-diagnostic-client-data-hooks.o +=
-DTARGET_NAME=\"$(target_noncanonical)\"
CFLAGS-optinfo-emit-json.o += -DTARGET_NAME=\"$(target_noncanonical)\"
$(ZLIBINC)
CFLAGS-analyzer/engine.o += $(ZLIBINC)
diff --git a/gcc/common.opt b/gcc/common.opt
index f6d93dc05fb..102e03496b8 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2718,6 +2718,13 @@ fprofile-reorder-functions
Common Var(flag_profile_reorder_functions) Optimization
Enable function reordering that improves code placement.
+ftarget-clones-table=
+Common Joined RejectNegative Var(target_clones_table)
+Enable target clones attributes specified in the JSON file.
+
+Variable
+const char *target_clones_table = NULL
+
fpatchable-function-entry=
Common Var(flag_patchable_function_entry) Joined Optimization
Insert NOP instructions at each function entry.
diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc
index e85d7e71442..c5d0a79e82d 100644
--- a/gcc/multiple_target.cc
+++ b/gcc/multiple_target.cc
@@ -21,6 +21,10 @@ along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
+#include <fstream>
+#define INCLUDE_MAP
+#define INCLUDE_STRING
+#define INCLUDE_SSTREAM
#include "system.h"
#include "coretypes.h"
#include "backend.h"
@@ -38,6 +42,7 @@ along with GCC; see the file COPYING3. If not see
#include "gimple-walk.h"
#include "tree-inline.h"
#include "intl.h"
+#include "json-parsing.h"
/* Walker callback that replaces all FUNCTION_DECL of a function that's
going to be versioned. */
@@ -252,27 +257,51 @@ create_target_clone (cgraph_node *node, bool definition,
char *name,
return new_node;
}
+/* Skip functions that are declared but not defined. Also skip
+ C++ virtual functions, as they cannot be cloned. */
+static bool node_versionable_function_p (cgraph_node *node)
+{
+ return (!node->definition
+ || (!node->alias && tree_versionable_function_p (node->decl)))
+ && !DECL_VIRTUAL_P (node->decl);
+}
+
/* If the function in NODE has multiple target attributes
create the appropriate clone for each valid target attribute. */
static bool
-expand_target_clones (struct cgraph_node *node, bool definition)
+expand_target_clones (struct cgraph_node *node, bool definition,
+ std::map <std::string, auto_vec<string_slice> >
+ &clone_map)
{
- /* Parsing target attributes separated by TARGET_CLONES_ATTR_SEPARATOR. */
- tree attr_target = lookup_attribute ("target_clones",
- DECL_ATTRIBUTES (node->decl));
- /* No targets specified. */
- if (!attr_target)
- return false;
-
int num_defaults = 0;
auto_vec<string_slice> attr_list = get_clone_versions (node->decl,
&num_defaults);
+ /* Skip functions that are declared but not defined. */
+ if (DECL_INITIAL (node->decl) != NULL_TREE)
+ {
+ auto it = clone_map.find (IDENTIFIER_POINTER (
+ DECL_ASSEMBLER_NAME_RAW (node->decl)));
+ if (it != clone_map.end () && node_versionable_function_p (node))
+ {
+ for (string_slice attr : it->second)
+ attr_list.safe_push (attr);
- /* If the target clones list is empty after filtering, remove this node. */
- if (!TARGET_HAS_FMV_TARGET_ATTRIBUTE && attr_list.is_empty ())
+ if (num_defaults == 0) /* No default in the source, add one. */
+ {
+ attr_list.safe_push ("default");
+ num_defaults = 1;
+ }
+ }
+ }
+
+ /* If there is no target_clones attribute, nothing to do. */
+ if (attr_list.is_empty ())
{
- node->remove ();
+ /* If the target clones list is empty after filtering,
+ remove this node. */
+ if (!TARGET_HAS_FMV_TARGET_ATTRIBUTE)
+ node->remove ();
return false;
}
@@ -506,11 +535,103 @@ is_simple_target_clones_case (cgraph_node *node)
return true;
}
+/* Initialize the clone map from the target clone table JSON file. Specified
+ by the -ftarget-clone-table option. The map is a mapping from symbol name
+ to a string with target clones attributes separated by
+ TARGET_CLONES_ATTR_SEPARATOR. */
+static std::map <std::string, auto_vec<string_slice> >
+init_clone_map (void)
+{
+ std::map <std::string, auto_vec<string_slice> > res;
+ if (! target_clones_table)
+ return res;
+
+ /* Take target string from TARGET_NAME, this macro looks like
+ "x86_64-linux-gnu" and we need to strip all the suffixes
+ after the first dash, so it becomes "x86_64". */
+ std::string target = TARGET_NAME;
+ if (target.find ('-') != std::string::npos)
+ target.erase (target.find ('-'));
+
+ /* Open the target clone table file and read to a string. */
+ std::ifstream json_file (target_clones_table);
+ if (json_file.fail ())
+ {
+ error ("cannot open target clone table file %s",
+ target_clones_table);
+ return res;
+ }
+ std::stringstream ss_buf;
+ ss_buf << json_file.rdbuf ();
+ std::string json_str = ss_buf.str ();
+
+ /* Parse the JSON string.
+ The JSON string format looks like this:
+ {
+ "symbol_name1": {
+ "target1": ["clone1", "clone2", ...],
+ "target2": ["clone1", "clone2", ...],
+ },
+ ...
+ }
+ where symbol_name is the ASM name of the function mangled by the
+ frontend. The target1 and target2 are the targets, which can be
+ "x86_64", "aarch64", "riscv64", etc. The clone1, clone2, etc are the
+ target clones attributes, which can be "avx2", "avx512" etc. Note that
+ there is no need to specify the "default" target clone, it is
+ automatically added by the pass. */
+ json::parser_result_t result = json::parse_utf8_string (
+ json_str.size (), json_str.c_str (), true, NULL);
+ if (auto json_err = result.m_err.get ())
+ {
+ error ("error parsing target clone table file %s: %s",
+ target_clones_table, json_err->get_msg ());
+ return res;
+ }
+
+ auto json_val = result.m_val.get ();
+ auto kind = json_val->get_kind ();
+ if (kind != json::JSON_OBJECT)
+ {
+ error ("target clone table file %s is not a JSON object",
+ target_clones_table);
+ return res;
+ }
+ auto json_obj = static_cast<const json::object *> (json_val);
+ unsigned i;
+ const char *symbol_name;
+ FOR_EACH_VEC_ELT (*json_obj, i, symbol_name)
+ {
+ auto symbol_val = json_obj->get (symbol_name);
+ if (!symbol_val || symbol_val->get_kind () != json::JSON_OBJECT)
+ continue;
+ auto symbol_obj = static_cast<const json::object *> (symbol_val);
+ auto cur_target_val = symbol_obj->get (target.c_str ());
+ if (!cur_target_val
+ || cur_target_val->get_kind () != json::JSON_ARRAY)
+ continue;
+ auto cur_target_array = static_cast<const json::array *>
+ (cur_target_val);
+ for (unsigned j = 0; j < cur_target_array->length (); j++)
+ {
+ auto target_str_val = cur_target_array->get (j);
+ if (target_str_val->get_kind () != json::JSON_STRING)
+ error ("target clones attribute is not a string");
+ res[symbol_name].safe_push (string_slice (
+ ggc_strdup (static_cast<const json::string *>
+ (target_str_val)->get_string ())));
+ }
+ }
+ return res;
+}
+
static unsigned int
ipa_target_clone (bool early)
{
struct cgraph_node *node;
auto_vec<cgraph_node *> to_dispatch;
+ std::map <std::string, auto_vec<string_slice> > clone_map
+ = init_clone_map ();
/* Don't need to do anything early for target attribute semantics. */
if (early && TARGET_HAS_FMV_TARGET_ATTRIBUTE)
@@ -543,7 +664,7 @@ ipa_target_clone (bool early)
the simple case. Simple cases are dispatched in the later stage. */
if (early == !is_simple_target_clones_case (node))
- if (expand_target_clones (node, node->definition)
+ if (expand_target_clones (node, node->definition, clone_map)
&& TARGET_HAS_FMV_TARGET_ATTRIBUTE)
/* In non target_version semantics, dispatch all target clones. */
to_dispatch.safe_push (node);
diff --git a/gcc/testsuite/gcc.target/i386/tct-0.c
b/gcc/testsuite/gcc.target/i386/tct-0.c
new file mode 100644
index 00000000000..0de0938e9fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/tct-0.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-ftarget-clones-table=${srcdir}/gcc.target/i386/tct-0.json" }
*/
+/* { dg-final { scan-assembler "foo\.default" } } */
+/* { dg-final { scan-assembler "foo\.arch_x86_64_v2" } } */
+/* { dg-final { scan-assembler "foo\.arch_x86_64_v3" } } */
+/* { dg-final { scan-assembler "foo\.arch_x86_64_v4" } } */
+
+void foo() {
+
+}
diff --git a/gcc/testsuite/gcc.target/i386/tct-0.json
b/gcc/testsuite/gcc.target/i386/tct-0.json
new file mode 100644
index 00000000000..9dc712e66e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/tct-0.json
@@ -0,0 +1,5 @@
+{
+ "foo": {
+ "x86_64": ["arch=x86-64-v2", "arch=x86-64-v3", "arch=x86-64-v4"]
+ }
+}
--
2.49.0