commit:     80713478a329eba635f17209a3e1ed0549062060
Author:     Sam James <sam <AT> gentoo <DOT> org>
AuthorDate: Sun Aug 17 15:10:50 2025 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Sun Aug 17 15:10:50 2025 +0000
URL:        https://gitweb.gentoo.org/proj/gcc-patches.git/commit/?id=80713478

16.0.0: update TLS patch

Signed-off-by: Sam James <sam <AT> gentoo.org>

 ...he-TLS-call-before-all-FLAGS_REG-setting-.patch | 287 ++++++++++++++++-----
 1 file changed, 227 insertions(+), 60 deletions(-)

diff --git 
a/16.0.0/gentoo/86_all_PR121572_x86-Place-the-TLS-call-before-all-FLAGS_REG-setting-.patch
 
b/16.0.0/gentoo/86_all_PR121572_x86-Place-the-TLS-call-before-all-FLAGS_REG-setting-.patch
index 62ccbe9..a01b5bc 100644
--- 
a/16.0.0/gentoo/86_all_PR121572_x86-Place-the-TLS-call-before-all-FLAGS_REG-setting-.patch
+++ 
b/16.0.0/gentoo/86_all_PR121572_x86-Place-the-TLS-call-before-all-FLAGS_REG-setting-.patch
@@ -1,9 +1,9 @@
-https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121572#c4
+https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121572#c6
 
-From caceadf47fe1b278311c73d5cbd062dca62298ac Mon Sep 17 00:00:00 2001
+From 27d463bd32623918d641a4f5168811ae86939e9b Mon Sep 17 00:00:00 2001
 From: "H.J. Lu" <[email protected]>
 Date: Sat, 16 Aug 2025 14:04:33 -0700
-Subject: [PATCH] x86: Place the TLS call before all FLAGS_REG setting BBs
+Subject: [PATCH] x86: Place the TLS call before all register setting BBs
 
 We can't place a TLS call before a conditional jump in a basic block like
 
@@ -18,9 +18,16 @@ We can't place a TLS call before a conditional jump in a 
basic block like
         (int_list:REG_BR_PROB 628353713 (nil)))
  -> 27)
 
-since the TLS call will clobber flags register.  Instead, we should place
-such call before all register setting basic blocks which dominate the
-current basic block.
+since the TLS call will clobber flags register nor place a TLS call in a
+basic block if any live caller-saved registers aren't dead at the end of
+the basic block:
+
+;; live  in      6 [bp] 7 [sp] 16 [argp] 17 [flags] 19 [frame] 104
+;; live  gen     0 [ax] 102 106 108 116 117 118 120
+;; live  kill    5 [di]
+
+Instead, we should place such call before all register setting basic
+blocks which dominate the current basic block.
 
 NB: GNU2 TLS:
 
@@ -43,30 +50,74 @@ only clobbers flags register.
 gcc/
 
        PR target/121572
-       * config/i386/i386-features.cc (ix86_place_single_tls_call): Also
-       search for REG_DEAD notes if flag register is alive.  Place the
-       TLS call before all FLAGS_REG setting BBs for conditional jump.
+       * config/i386/i386-features.cc (ix86_get_dominator_for_reg): New.
+       (ix86_place_single_tls_call): Add the live flag register to the
+       bitmap.  Clear the live register bitmap only for hard register.
+       If there is a conditional jump in the basic block or any live
+       caller-saved registers aren't dead at the end of the basic block,
+       get the basic block which dominates all basic blocks which set
+       the live registers.
 
 gcc/testsuite/
 
        PR target/121572
        * gcc.target/i386/pr121572-1a.c: New test.
        * gcc.target/i386/pr121572-1b.c: Likewise.
+       * gcc.target/i386/pr121572-2a.c: Likewise.
+       * gcc.target/i386/pr121572-2b.c: Likewise.
 
 Signed-off-by: H.J. Lu <[email protected]>
 ---
- gcc/config/i386/i386-features.cc            | 41 ++++++++++++++++++++-
- gcc/testsuite/gcc.target/i386/pr121572-1a.c | 40 ++++++++++++++++++++
- gcc/testsuite/gcc.target/i386/pr121572-1b.c | 18 +++++++++
- 3 files changed, 98 insertions(+), 1 deletion(-)
+ gcc/config/i386/i386-features.cc            | 77 ++++++++++++++++-----
+ gcc/testsuite/gcc.target/i386/pr121572-1a.c | 41 +++++++++++
+ gcc/testsuite/gcc.target/i386/pr121572-1b.c | 18 +++++
+ gcc/testsuite/gcc.target/i386/pr121572-2a.c | 55 +++++++++++++++
+ gcc/testsuite/gcc.target/i386/pr121572-2b.c | 17 +++++
+ 5 files changed, 192 insertions(+), 16 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1a.c
  create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-1b.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2a.c
+ create mode 100644 gcc/testsuite/gcc.target/i386/pr121572-2b.c
 
 diff --git a/gcc/config/i386/i386-features.cc 
b/gcc/config/i386/i386-features.cc
-index f0bdc5c1880..7cdf98c5778 100644
+index f0bdc5c1880..235c255232a 100644
 --- a/gcc/config/i386/i386-features.cc
 +++ b/gcc/config/i386/i386-features.cc
-@@ -3748,6 +3748,7 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
x86_cse_kind kind,
+@@ -3732,6 +3732,33 @@ replace_tls_call (rtx src, auto_bitmap &tls_call_insns)
+     }
+ }
+ 
++/* Return the basic block which dominates all basic blocks which set
++   hard register REGNO used in basic block BB.  */
++
++static basic_block
++ix86_get_dominator_for_reg (unsigned int regno, basic_block bb)
++{
++  basic_block set_bb;
++  auto_bitmap set_bbs;
++
++  /* Get all BBs which set REGNO and dominate the current BB from all
++     DEFs of REGNO.  */
++  for (df_ref def = DF_REG_DEF_CHAIN (regno);
++       def;
++       def = DF_REF_NEXT_REG (def))
++    if (!DF_REF_IS_ARTIFICIAL (def)
++      && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
++      && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
++      {
++      set_bb = DF_REF_BB (def);
++      if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
++        bitmap_set_bit (set_bbs, set_bb->index);
++      }
++
++  bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
++  return bb;
++}
++
+ /* Generate a TLS call of KIND with VAL and copy the call result to DEST,
+    at entry of the nearest dominator for basic block map BBS, which is in
+    the fake loop that contains the whole function, so that there is only
+@@ -3748,6 +3775,7 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
x86_cse_kind kind,
      bb = get_immediate_dominator (CDI_DOMINATORS,
                                  bb->loop_father->header);
  
@@ -74,16 +125,17 @@ index f0bdc5c1880..7cdf98c5778 100644
    rtx_insn *insn = BB_HEAD (bb);
    while (insn && !NONDEBUG_INSN_P (insn))
      {
-@@ -3837,7 +3838,7 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
x86_cse_kind kind,
-         && bitmap_bit_p (in, i))
-       bitmap_set_bit (live_caller_saved_regs, i);
+@@ -3824,7 +3852,8 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
x86_cse_kind kind,
+   auto_bitmap live_caller_saved_regs;
+   bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
  
--  if (!bitmap_empty_p (live_caller_saved_regs))
-+  if (flags_live_p || !bitmap_empty_p (live_caller_saved_regs))
-     {
-       /* Search for REG_DEAD notes in this basic block.  */
-       FOR_BB_INSNS (bb, insn)
-@@ -3845,6 +3846,44 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
x86_cse_kind kind,
+-  bool flags_live_p = bitmap_bit_p (in, FLAGS_REG);
++  if (bitmap_bit_p (in, FLAGS_REG))
++    bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
+ 
+   unsigned int i;
+ 
+@@ -3845,13 +3874,28 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
x86_cse_kind kind,
          if (!NONDEBUG_INSN_P (insn))
            continue;
  
@@ -96,44 +148,74 @@ index f0bdc5c1880..7cdf98c5778 100644
 +                || !(LABEL_P (label) || SYMBOL_REF_P (label)))
 +              gcc_unreachable ();
 +
-+            rtx_insn *set_insn;
-+            basic_block set_bb;
-+            auto_bitmap set_bbs;
-+
-+            /* Get all BBs which define FLAGS_REG and dominate the
-+               current BB from all DEFs of FLAGS_REG.  */
-+            for (df_ref def = DF_REG_DEF_CHAIN (FLAGS_REG);
-+                 def;
-+                 def = DF_REF_NEXT_REG (def))
-+              if (!DF_REF_IS_ARTIFICIAL (def)
-+                  && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
-+                  && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
-+                {
-+                  set_insn = DF_REF_INSN (def);
-+                  set = single_set (set_insn);
-+                  gcc_assert (set);
-+                  set_bb = DF_REF_BB (def);
-+                  if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
-+                    bitmap_set_bit (set_bbs, set_bb->index);
-+                }
-+
 +            /* Place the call before all FLAGS_REG setting BBs since
 +               we can't place a call before nor after a conditional
 +               jump.  */
-+            bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
-+                                                   set_bbs);
-+            goto place_tls_call;
++            break;
 +          }
 +
          /* Check if FLAGS register is live.  */
          set = single_set (insn);
          if (set)
+           {
+             rtx dest = SET_DEST (set);
+             if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
+-              flags_live_p = true;
++              bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
+           }
+ 
+         rtx link;
+@@ -3863,29 +3907,30 @@ ix86_place_single_tls_call (rtx dest, rtx val, 
x86_cse_kind kind,
+               for (i = REGNO (XEXP (link, 0));
+                    i < END_REGNO (XEXP (link, 0));
+                    i++)
+-                bitmap_clear_bit (live_caller_saved_regs, i);
+-
+-              /* Check if FLAGS register is dead.  */
+-              if (REGNO (XEXP (link, 0)) == FLAGS_REG)
+-                flags_live_p = false;
++                if (i < FIRST_PSEUDO_REGISTER)
++                  bitmap_clear_bit (live_caller_saved_regs, i);
+ 
+               if (bitmap_empty_p (live_caller_saved_regs))
+                 {
+-                  /* All live caller-saved registers are dead after
+-                     this instruction.  Since TLS instructions
+-                     clobber FLAGS register, it must be dead where
+-                     the TLS will be inserted after.  */
+-                  if (flags_live_p)
+-                    gcc_unreachable ();
+                   after = insn;
+                   goto insert_after;
+                 }
+             }
+       }
+ 
+-      /* All live caller-saved registers should be dead at the end
+-       of this basic block.  */
+-      gcc_unreachable ();
++      /* If any live caller-saved registers aren't dead at the end
++       of this basic block, get the basic block which dominates all
++       basic blocks which set the remaining live registers.  */
++      auto_bitmap set_bbs;
++      bitmap_iterator bi;
++      unsigned int id;
++      EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi)
++      {
++        basic_block set_bb = ix86_get_dominator_for_reg (id, bb);
++        bitmap_set_bit (set_bbs, set_bb->index);
++      }
++      bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
++      goto place_tls_call;
+     }
+ 
+   /* Emit the TLS CALL insn.  */
 diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1a.c 
b/gcc/testsuite/gcc.target/i386/pr121572-1a.c
 new file mode 100644
-index 00000000000..179a1a9e66c
+index 00000000000..270d8ff5cb6
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/i386/pr121572-1a.c
-@@ -0,0 +1,40 @@
+@@ -0,0 +1,41 @@
 +/* { dg-do compile { target *-*-linux* } } */
 +/* { dg-options "-O0 -fpic -fplt -mtls-dialect=gnu" } */
 +/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
@@ -151,28 +233,29 @@ index 00000000000..179a1a9e66c
 +**...
 +*/
 +
-+extern __thread int tv_cache __attribute__ ((visibility("hidden")));
++extern __thread int tv_cache __attribute__ ((visibility ("hidden")));
 +extern void use_cache (int);
 +extern int val (int v);
 +
-+__attribute__((optimize(2)))
++__attribute__ ((optimize (2)))
 +void
 +bug (void)
 +{
-+  int compared = val(-1);
++  int compared = val (-1);
 +
-+  if (compared == 0 || (compared > 0 && val(2) == 0))
++  if (compared == 0 || (compared > 0 && val (2) == 0))
 +    {
-+      __builtin_trap();
++      __builtin_trap ();
 +    }
 +
-+  if (compared < 0) {
-+      use_cache(tv_cache);
++  if (compared < 0)
++    {
++      use_cache (tv_cache);
 +      return;
-+  }
++    }
 +
-+  use_cache(tv_cache);
-+  __builtin_trap();
++  use_cache (tv_cache);
++  __builtin_trap ();
 +}
 diff --git a/gcc/testsuite/gcc.target/i386/pr121572-1b.c 
b/gcc/testsuite/gcc.target/i386/pr121572-1b.c
 new file mode 100644
@@ -198,5 +281,89 @@ index 00000000000..8a6089109f5
 +*/
 +
 +#include "pr121572-1a.c"
+diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2a.c 
b/gcc/testsuite/gcc.target/i386/pr121572-2a.c
+new file mode 100644
+index 00000000000..3f2230f8885
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr121572-2a.c
+@@ -0,0 +1,55 @@
++/* { dg-do compile { target *-*-linux* } } */
++/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu" } */
++/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
++/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
{^\t?\.} } } */
++
++/*
++**mpfr_agm:
++**.LFB[0-9]+:
++**...
++**    movl    %edi, %ebp
++**    data16  leaq    __gmpfr_emax@tlsgd\(%rip\), %rdi
++**    .value  0x6666
++**    rex64
++**    call    __tls_get_addr@PLT
++**    mov[l|q]        mpfr_agm_compare@GOTPCREL\(%rip\), %[e|r]dx
++**    movl    \(%[e|r]dx\), %edx
++**    testl   %edx, %edx
++**    je      .L2
++**...
++*/
++
++typedef enum
++{
++  MPFR_RNDN
++} mpfr_rnd_t;
++typedef int mpfr_t[1];
++long __gmpfr_emin, mpfr_agm_expo_0;
++_Thread_local long __gmpfr_emax;
++int mpfr_agm_compare, mpfr_agm___trans_tmp_1;
++mpfr_t mpfr_agm_u;
++void mpfr_mul (int *, int, int, mpfr_rnd_t);
++int
++mpfr_agm (int op1)
++{
++  int op2 = 0;
++  if (__builtin_expect (mpfr_agm_compare == 0, 0))
++    return 0;
++  if (mpfr_agm_compare > 0)
++    {
++      int t = op1;
++      op2 = t;
++    }
++  mpfr_agm_expo_0 = __gmpfr_emax;
++  for (;;)
++    {
++    retry:
++      mpfr_mul (mpfr_agm_u, op1, op2, MPFR_RNDN);
++      if (0)
++        goto retry;
++      if (__builtin_expect (mpfr_agm___trans_tmp_1, 1))
++        break;
++    }
++  __gmpfr_emin = __gmpfr_emax;
++  return 0;
++}
+diff --git a/gcc/testsuite/gcc.target/i386/pr121572-2b.c 
b/gcc/testsuite/gcc.target/i386/pr121572-2b.c
+new file mode 100644
+index 00000000000..d81e2edc6f2
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr121572-2b.c
+@@ -0,0 +1,17 @@
++/* { dg-do compile { target *-*-linux* } } */
++/* { dg-options "-O2 -fpic -fplt -mtls-dialect=gnu2" } */
++/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc').  */
++/* { dg-final { check-function-bodies "**" "" "" { target { ! ia32 } } 
{^\t?\.} } } */
++
++/*
++**mpfr_agm:
++**.LFB[0-9]+:
++**    .cfi_startproc
++**    sub[l|q]        \$[0-9]+, %[e|r]sp
++**    .cfi_def_cfa_offset [0-9]+
++**    lea[l|q]        __gmpfr_emax@TLSDESC\(%rip\), %[e|r]ax
++**    call    \*__gmpfr_emax@TLSCALL\(%[e|r]ax\)
++**...
++*/
++
++#include "pr121572-2a.c"
 -- 
 2.50.1

Reply via email to