As GCC 11 is released and the development of GCC 12 begins I would like
to send this patch again.
It is basically unchanged since the previous submission in February. The
only change I made is fixing formatting by adding a space after gcc_assert.
Please note, that the original author of the code no longer actively
contribute to GCC, so he is unlikely to review this patch. It would be
great if someone else takes a look at it.
On 03.05.2021 11:39, Ivan Sorokin wrote:
Prior to this commit GCC -O2 generated quite bad code for this
function:
bool f()
{
return __builtin_cpu_supports("popcnt")
&& __builtin_cpu_supports("ssse3");
}
f:
movl __cpu_model+12(%rip), %eax
xorl %r8d, %r8d
testb $4, %al
je .L1
shrl $6, %eax
movl %eax, %r8d
andl $1, %r8d
.L1:
movl %r8d, %eax
ret
The problem was caused by the fact that internally every invocation
of __builtin_cpu_supports built a new variable __cpu_model and a new
type __processor_model. Because of this GIMPLE level optimizers
weren't able to CSE the loads of __cpu_model and optimize
bit-operations properly.
This commit fixes the problem by caching created __cpu_model
variable and __processor_model type. Now the GCC -O2 generates:
f:
movl __cpu_model+12(%rip), %eax
andl $68, %eax
cmpl $68, %eax
sete %al
ret
gcc/ChangeLog:
PR target/91400
* config/i386/i386-builtins.c (fold_builtin_cpu): Extract
building of __cpu_model and __processor_model into new
function.
* config/i386/i386-builtins.c (init_cpu_model_var): New.
Cache creation of __cpu_model and __processor_model.
gcc/testsuite/Changelog:
PR target/91400
* gcc.target/i386/pr91400.c: New.
---
gcc/config/i386/i386-builtins.c | 27 ++++++++++++++++++-------
gcc/testsuite/gcc.target/i386/pr91400.c | 11 ++++++++++
2 files changed, 31 insertions(+), 7 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr91400.c
diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c
index b66911082ab..b7d9dd18b03 100644
--- a/gcc/config/i386/i386-builtins.c
+++ b/gcc/config/i386/i386-builtins.c
@@ -2103,6 +2103,25 @@ make_var_decl (tree type, const char *name)
return new_decl;
}
+static GTY(()) tree __cpu_model_var;
+static GTY(()) tree __processor_model_type;
+
+static void
+init_cpu_model_var()
+{
+ if (__cpu_model_var != NULL_TREE)
+ {
+ gcc_assert (__processor_model_type != NULL_TREE);
+ return;
+ }
+
+ __processor_model_type = build_processor_model_struct ();
+ __cpu_model_var = make_var_decl (__processor_model_type,
+ "__cpu_model");
+
+ varpool_node::add (__cpu_model_var);
+}
+
/* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is
folded
into an integer defined in libgcc/config/i386/cpuinfo.c */
@@ -2114,13 +2133,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
= (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
tree param_string_cst = NULL;
- tree __processor_model_type = build_processor_model_struct ();
- tree __cpu_model_var = make_var_decl (__processor_model_type,
- "__cpu_model");
-
-
- varpool_node::add (__cpu_model_var);
-
+ init_cpu_model_var ();
gcc_assert ((args != NULL) && (*args != NULL));
param_string_cst = *args;
diff --git a/gcc/testsuite/gcc.target/i386/pr91400.c
b/gcc/testsuite/gcc.target/i386/pr91400.c
new file mode 100644
index 00000000000..e8b7d9285f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr91400.c
@@ -0,0 +1,11 @@
+/* PR target/91400 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "andl" 1 } } */
+/* { dg-final { scan-assembler-times "68" 2 } } */
+/* { dg-final { scan-assembler-not "je" } } */
+
+_Bool f()
+{
+ return __builtin_cpu_supports("popcnt") && __builtin_cpu_supports("ssse3");
+}