Hi Richard,

On 2019/8/13 17:10, Richard Biener wrote:
On Tue, Aug 13, 2019 at 4:22 AM luoxhu <luo...@linux.ibm.com> wrote:

Hi Richard,

On 2019/8/12 16:51, Richard Biener wrote:
On Mon, Aug 12, 2019 at 8:50 AM luoxhu <luo...@linux.ibm.com> wrote:

Hi Richard,
Thanks for your comments, updated the v2 patch as below:
1. Define and use builtin_with_linkage_p.
2. Add comments.
3. Add a testcase.

In LTO mode, if static library and dynamic library contains same
function and both libraries are passed as arguments, linker will link
the function in dynamic library no matter the sequence.  This patch
will output LTO symbol node as UNDEF if BUILT_IN_NORMAL function FNDECL
is a math function, then the function in static library will be linked
first if its sequence is ahead of the dynamic library.

Comments below

gcc/ChangeLog

          2019-08-12  Xiong Hu Luo  <luo...@linux.ibm.com>

          PR lto/91287
          * builtins.c (builtin_with_linkage_p): New function.
          * builtins.h (builtin_with_linkage_p): New function.
          * symtab.c (write_symbol): Use builtin_with_linkage_p.
          * lto-streamer-out.c (symtab_node::output_to_lto_symbol_table_p):
          Likewise.

gcc/testsuite/ChangeLog

          2019-08-12  Xiong Hu Luo  <luo...@linux.ibm.com>

          PR lto/91287
          * gcc.dg/pr91287.c: New testcase.
---
   gcc/builtins.c                 | 89 ++++++++++++++++++++++++++++++++++
   gcc/builtins.h                 |  2 +
   gcc/lto-streamer-out.c         |  4 +-
   gcc/symtab.c                   | 13 ++++-
   gcc/testsuite/gcc.dg/pr91287.c | 40 +++++++++++++++
   5 files changed, 145 insertions(+), 3 deletions(-)
   create mode 100644 gcc/testsuite/gcc.dg/pr91287.c

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 695a9d191af..f4dea941a27 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -11244,3 +11244,92 @@ target_char_cst_p (tree t, char *p)
     *p = (char)tree_to_uhwi (t);
     return true;
   }
+
+/* Return true if DECL is a specified builtin math function.  These functions
+   should have symbol in symbol table to provide linkage with faster version of
+   libraries.  */

The comment should read like

/* Return true if the builtin DECL is implemented in a standard
library.  Otherwise
     returns false which doesn't guarantee it is not (thus the list of
handled builtins
     below may be incomplete).  */

+bool
+builtin_with_linkage_p (tree decl)
+{
+  if (!decl)
+    return false;

Omit this check please.

+  if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
+    switch (DECL_FUNCTION_CODE (decl))
+    {
+      CASE_FLT_FN (BUILT_IN_ACOS):
+      CASE_FLT_FN (BUILT_IN_ACOSH):
+      CASE_FLT_FN (BUILT_IN_ASIN):
+      CASE_FLT_FN (BUILT_IN_ASINH):
+      CASE_FLT_FN (BUILT_IN_ATAN):
+      CASE_FLT_FN (BUILT_IN_ATANH):
+      CASE_FLT_FN (BUILT_IN_ATAN2):
+      CASE_FLT_FN (BUILT_IN_CBRT):
+      CASE_FLT_FN (BUILT_IN_CEIL):
+      CASE_FLT_FN_FLOATN_NX (BUILT_IN_CEIL):
+      CASE_FLT_FN (BUILT_IN_COPYSIGN):
+      CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
+      CASE_FLT_FN (BUILT_IN_COS):
+      CASE_FLT_FN (BUILT_IN_COSH):
+      CASE_FLT_FN (BUILT_IN_ERF):
+      CASE_FLT_FN (BUILT_IN_ERFC):
+      CASE_FLT_FN (BUILT_IN_EXP):
+      CASE_FLT_FN (BUILT_IN_EXP2):
+      CASE_FLT_FN (BUILT_IN_EXPM1):
+      CASE_FLT_FN (BUILT_IN_FABS):
+      CASE_FLT_FN_FLOATN_NX (BUILT_IN_FABS):
+      CASE_FLT_FN (BUILT_IN_FDIM):
+      CASE_FLT_FN (BUILT_IN_FLOOR):
+      CASE_FLT_FN_FLOATN_NX (BUILT_IN_FLOOR):
+      CASE_FLT_FN (BUILT_IN_FMA):
+      CASE_FLT_FN_FLOATN_NX (BUILT_IN_FMA):
+      CASE_FLT_FN (BUILT_IN_FMAX):
+      CASE_FLT_FN_FLOATN_NX (BUILT_IN_FMAX):
+      CASE_FLT_FN (BUILT_IN_FMIN):
+      CASE_FLT_FN_FLOATN_NX (BUILT_IN_FMIN):
+      CASE_FLT_FN (BUILT_IN_FMOD):
+      CASE_FLT_FN (BUILT_IN_FREXP):
+      CASE_FLT_FN (BUILT_IN_HYPOT):
+      CASE_FLT_FN (BUILT_IN_ILOGB):
+      CASE_FLT_FN (BUILT_IN_LDEXP):
+      CASE_FLT_FN (BUILT_IN_LGAMMA):
+      CASE_FLT_FN (BUILT_IN_LLRINT):
+      CASE_FLT_FN (BUILT_IN_LLROUND):
+      CASE_FLT_FN (BUILT_IN_LOG):
+      CASE_FLT_FN (BUILT_IN_LOG10):
+      CASE_FLT_FN (BUILT_IN_LOG1P):
+      CASE_FLT_FN (BUILT_IN_LOG2):
+      CASE_FLT_FN (BUILT_IN_LOGB):
+      CASE_FLT_FN (BUILT_IN_LRINT):
+      CASE_FLT_FN (BUILT_IN_LROUND):
+      CASE_FLT_FN (BUILT_IN_MODF):
+      CASE_FLT_FN (BUILT_IN_NAN):
+      CASE_FLT_FN (BUILT_IN_NEARBYINT):
+      CASE_FLT_FN_FLOATN_NX (BUILT_IN_NEARBYINT):
+      CASE_FLT_FN (BUILT_IN_NEXTAFTER):
+      CASE_FLT_FN (BUILT_IN_NEXTTOWARD):
+      CASE_FLT_FN (BUILT_IN_POW):
+      CASE_FLT_FN (BUILT_IN_REMAINDER):
+      CASE_FLT_FN (BUILT_IN_REMQUO):
+      CASE_FLT_FN (BUILT_IN_RINT):
+      CASE_FLT_FN_FLOATN_NX (BUILT_IN_RINT):
+      CASE_FLT_FN (BUILT_IN_ROUND):
+      CASE_FLT_FN_FLOATN_NX (BUILT_IN_ROUND):
+      CASE_FLT_FN (BUILT_IN_SCALBLN):
+      CASE_FLT_FN (BUILT_IN_SCALBN):
+      CASE_FLT_FN (BUILT_IN_SIN):
+      CASE_FLT_FN (BUILT_IN_SINH):
+      CASE_FLT_FN (BUILT_IN_SINCOS):
+      CASE_FLT_FN (BUILT_IN_SQRT):
+      CASE_FLT_FN_FLOATN_NX (BUILT_IN_SQRT):
+      CASE_FLT_FN (BUILT_IN_TAN):
+      CASE_FLT_FN (BUILT_IN_TANH):
+      CASE_FLT_FN (BUILT_IN_TGAMMA):
+      CASE_FLT_FN (BUILT_IN_TRUNC):
+      CASE_FLT_FN_FLOATN_NX (BUILT_IN_TRUNC):
+       return true;
+      default:
+       break;
+    }
+  return false;
+}
diff --git a/gcc/builtins.h b/gcc/builtins.h
index 1ffb491d785..91cbd81be48 100644
--- a/gcc/builtins.h
+++ b/gcc/builtins.h
@@ -151,4 +151,6 @@ extern internal_fn replacement_internal_fn (gcall *);
   extern void warn_string_no_nul (location_t, const char *, tree, tree);
   extern tree unterminated_array (tree, tree * = NULL, bool * = NULL);

+extern bool builtin_with_linkage_p (tree);

no vertical space above this line.

+
   #endif /* GCC_BUILTINS_H */
diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c
index 47a9143ae26..73bf45810f0 100644
--- a/gcc/lto-streamer-out.c
+++ b/gcc/lto-streamer-out.c
@@ -2644,7 +2644,9 @@ write_symbol (struct streamer_tree_cache_d *cache,

     gcc_checking_assert (TREE_PUBLIC (t)
                         && (TREE_CODE (t) != FUNCTION_DECL
-                          || !fndecl_built_in_p (t))
+                          || cgraph_node::get (t)->definition
+                          || !fndecl_built_in_p (t, BUILT_IN_NORMAL)
+                          || builtin_with_linkage_p (t))
                         && !DECL_ABSTRACT_P (t)
                         && (!VAR_P (t) || !DECL_HARD_REGISTER (t)));

Please remove the whole assert - both callers are guarded by
symtab_node::output_to_lto_symbol_table_p which means it is redundant
and fragile to keep in-sync.

diff --git a/gcc/symtab.c b/gcc/symtab.c
index 63e2820eb93..e806afdede4 100644
--- a/gcc/symtab.c
+++ b/gcc/symtab.c
@@ -2377,8 +2377,17 @@ symtab_node::output_to_lto_symbol_table_p (void)
       return false;
     /* FIXME: Builtins corresponding to real functions probably should have
        symbol table entries.  */

Remove this comment

-  if (TREE_CODE (decl) == FUNCTION_DECL && fndecl_built_in_p (decl))
-    return false;
+  if (TREE_CODE (decl) == FUNCTION_DECL && !definition
+      && fndecl_built_in_p (decl))
+    {
+      /* Math functions may have faster version in static library, output the
+        symbol for linkage.  Functions have implementation in libgcc will be
+        excluded as the symbol name may mismatch.  */

    /* Builtins like those for most math functions have actual implementations 
in
       libraries so make sure to output references into the symbol table to make
       those libraries referenced.  Note this is incomplete handling for now and
       only covers math functions.  */

+      if (builtin_with_linkage_p (decl))
+       return true;
+      else
+       return false;
+    }

     /* We have real symbol that should be in symbol table.  However try to trim
        down the refernces to libraries bit more because linker will otherwise
diff --git a/gcc/testsuite/gcc.dg/pr91287.c b/gcc/testsuite/gcc.dg/pr91287.c
new file mode 100644
index 00000000000..c816e0537aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr91287.c
@@ -0,0 +1,40 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2" } */

You don't use -flto here so the testcase doesn't exercise any of the patched
code.  Does it work when you add -flto here?  That is, do
scan-symbol[-not] properly use gcc-nm or the linker plugin?

-flto is needed here to check this patch correctness, my mistake here,
thanks for catching.  atan2 will exists in pr91287.o even without lto as
pr91287.o has the instruction "bl atan2".

Sure, that's expected.  It is expected that without the patch but with -flto
the symbol doesn't appear.

Note that to fail we rely on -fno-fat-lto-objects and linker plugin
availability.
So you should add

/* { dg-require-linker-plugin "" } */

after /* { dg-do assemble } */

and use -O2 -flto -fno-fat-lto-objects -fuse-linker-plugin

the other issue is that scan-symbol invokes 'nm' without any plugin
arguments so we rely on plugin auto-loading and a system-installed
plugin.  We could use built gcc-nm here if present but would need to
pass an appropriate -B argument, see how we find xg++ in g++.exp
and scan-symbol-common in gcc-dg.exp.  It might be better / easier
to move the testcase to gcc.dg/lto/ but lto.exp might not support
scan-symbol ...

So...  I think the patch is ok as-is if you omit the testcase which needs
more work to be useful (and not sure if worth all the trouble).

Committed the patch as r274411.
Since this patch could leverage the IBM MASS library and provides about
GEOMEAN 9% improvement for fprate(no change to intrate), especially for
521.wrf_r (+48%),527.cam4_r (+30%) and 554.roms_r(+15%), backport it to
gcc-9-branch, gcc-8-branch and even gcc-7-branch?

503.bwaves_r    214     214     0.00%
508.namd_r      287     287     0.00%
510.parest_r    1121    1138    -1.52%
511.povray_r    1049    1049    0.00%
519.lbm_r       166     166     0.00%
521.wrf_r       1095    564     48.49%
526.blender_r   606     603     0.50%
527.cam4_r      487     339     30.39%
538.imagick_r   507     531     -4.73%
544.nab_r       451     451     0.00%
549.fotonik3d_r 332     332     0.00%
554.roms_r      370     313     15.41%
specrate        497.21  471.04  5.26%
intrate         539.88  539.94  -0.01%
fprate          467.44  425.19  9.04%

Thanks
Xionghu


Thanks,
Richard.

After adding -flto the case also works as symbol is written to pr91287.o.
PS: update other changes in patch attached.



Xionghu


+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <stdlib.h>
+
+double __attribute__((noinline)) zowie (double x, double y, double z)
+{
+  x = __builtin_clz(x);
+  return atan2 (x * y, z);
+}
+
+double __attribute__((noinline)) rand_finite_double (void)
+{
+  union {
+    double d;
+    unsigned char uc[sizeof(double)];
+  } u;
+  do {
+    for (unsigned i = 0; i < sizeof u.uc; i++) {
+      u.uc[i] = (unsigned char) rand();
+    }
+  } while (!isfinite(u.d));
+  return u.d;
+}
+
+
+int main ()
+{
+  char str[100];
+  strcpy(str, "test\n");
+  double a = rand_finite_double ();
+  printf ("%lf, %s\n", zowie (a, 4.5, 2.2), str);
+  return 0;
+}
+
+/* { dg-final { scan-symbol "atan2" } } */
+/* { dg-final { scan-symbol-not "__builtin_clz" } } */
--
2.21.0.777.g83232e3864




Reply via email to